JavaScript parser: properly consume closing brace after a block in findCmdTerm()
[geany-mirror.git] / tagmanager / ctags / js.c
blobebe3616564c456188afbfd83175dffc2972425a1
1 /*
2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for JavaScript language
8 * files.
10 * This is a good reference for different forms of the function statement:
11 * http://www.permadi.com/tutorial/jsFunc/
12 * Another good reference:
13 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
17 * INCLUDE FILES
19 #include "general.h" /* must always come first */
20 #include <ctype.h> /* to define isalpha () */
21 #include <string.h>
22 #include <setjmp.h>
23 #include <mio/mio.h>
24 #ifdef DEBUG
25 #include <stdio.h>
26 #endif
28 #include "keyword.h"
29 #include "parse.h"
30 #include "read.h"
31 #include "main.h"
32 #include "vstring.h"
35 * MACROS
37 #define isType(token,t) (boolean) ((token)->type == (t))
38 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
41 * DATA DECLARATIONS
44 typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
47 * Tracks class and function names already created
49 static stringList *ClassNames;
50 static stringList *FunctionNames;
52 /* Used to specify type of keyword.
54 typedef enum eKeywordId {
55 KEYWORD_NONE = -1,
56 KEYWORD_function,
57 KEYWORD_capital_function,
58 KEYWORD_object,
59 KEYWORD_capital_object,
60 KEYWORD_prototype,
61 KEYWORD_var,
62 KEYWORD_new,
63 KEYWORD_this,
64 KEYWORD_for,
65 KEYWORD_while,
66 KEYWORD_do,
67 KEYWORD_if,
68 KEYWORD_else,
69 KEYWORD_switch,
70 KEYWORD_try,
71 KEYWORD_catch,
72 KEYWORD_finally
73 } keywordId;
75 /* Used to determine whether keyword is valid for the token language and
76 * what its ID is.
78 typedef struct sKeywordDesc {
79 const char *name;
80 keywordId id;
81 } keywordDesc;
83 typedef enum eTokenType {
84 TOKEN_UNDEFINED,
85 TOKEN_CHARACTER,
86 TOKEN_CLOSE_PAREN,
87 TOKEN_SEMICOLON,
88 TOKEN_COLON,
89 TOKEN_COMMA,
90 TOKEN_KEYWORD,
91 TOKEN_OPEN_PAREN,
92 TOKEN_OPERATOR,
93 TOKEN_IDENTIFIER,
94 TOKEN_STRING,
95 TOKEN_PERIOD,
96 TOKEN_OPEN_CURLY,
97 TOKEN_CLOSE_CURLY,
98 TOKEN_EQUAL_SIGN,
99 TOKEN_FORWARD_SLASH,
100 TOKEN_OPEN_SQUARE,
101 TOKEN_CLOSE_SQUARE
102 } tokenType;
104 typedef struct sTokenInfo {
105 tokenType type;
106 keywordId keyword;
107 vString * string;
108 vString * scope;
109 unsigned long lineNumber;
110 MIOPos filePosition;
111 int nestLevel;
112 boolean ignoreTag;
113 } tokenInfo;
116 * DATA DEFINITIONS
119 static langType Lang_js;
121 static jmp_buf Exception;
123 typedef enum {
124 JSTAG_FUNCTION,
125 JSTAG_CLASS,
126 JSTAG_METHOD,
127 JSTAG_PROPERTY,
128 JSTAG_VARIABLE,
129 JSTAG_COUNT
130 } jsKind;
132 static kindOption JsKinds [] = {
133 { TRUE, 'f', "function", "functions" },
134 { TRUE, 'c', "class", "classes" },
135 { TRUE, 'm', "method", "methods" },
136 { TRUE, 'p', "member", "properties" },
137 { TRUE, 'v', "variable", "global variables" }
140 static const keywordDesc JsKeywordTable [] = {
141 /* keyword keyword ID */
142 { "function", KEYWORD_function },
143 { "Function", KEYWORD_capital_function },
144 { "object", KEYWORD_object },
145 { "Object", KEYWORD_capital_object },
146 { "prototype", KEYWORD_prototype },
147 { "var", KEYWORD_var },
148 { "new", KEYWORD_new },
149 { "this", KEYWORD_this },
150 { "for", KEYWORD_for },
151 { "while", KEYWORD_while },
152 { "do", KEYWORD_do },
153 { "if", KEYWORD_if },
154 { "else", KEYWORD_else },
155 { "switch", KEYWORD_switch },
156 { "try", KEYWORD_try },
157 { "catch", KEYWORD_catch },
158 { "finally", KEYWORD_finally }
162 * FUNCTION DEFINITIONS
165 /* Recursive functions */
166 static void parseFunction (tokenInfo *const token);
167 static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
168 static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
170 static boolean isIdentChar (const int c)
172 return (boolean)
173 (isalpha (c) || isdigit (c) || c == '$' ||
174 c == '@' || c == '_' || c == '#');
177 static void buildJsKeywordHash (void)
179 const size_t count = sizeof (JsKeywordTable) /
180 sizeof (JsKeywordTable [0]);
181 size_t i;
182 for (i = 0 ; i < count ; ++i)
184 const keywordDesc* const p = &JsKeywordTable [i];
185 addKeyword (p->name, Lang_js, (int) p->id);
189 static tokenInfo *newToken (void)
191 tokenInfo *const token = xMalloc (1, tokenInfo);
193 token->type = TOKEN_UNDEFINED;
194 token->keyword = KEYWORD_NONE;
195 token->string = vStringNew ();
196 token->scope = vStringNew ();
197 token->nestLevel = 0;
198 token->ignoreTag = FALSE;
199 token->lineNumber = getSourceLineNumber ();
200 token->filePosition = getInputFilePosition ();
202 return token;
205 static void deleteToken (tokenInfo *const token)
207 vStringDelete (token->string);
208 vStringDelete (token->scope);
209 eFree (token);
213 * Tag generation functions
216 static void makeJsTag (tokenInfo *const token, const jsKind kind)
218 if (JsKinds [kind].enabled && ! token->ignoreTag )
220 const char *name = vStringValue (token->string);
221 vString *fullscope = vStringNewCopy (token->scope);
222 const char *p;
223 tagEntryInfo e;
225 if ((p = strrchr (name, '.')) != NULL)
227 if (vStringLength (fullscope) > 0)
228 vStringPut (fullscope, '.');
229 vStringNCatS (fullscope, name, p - name);
230 name = p + 1;
233 initTagEntry (&e, name);
235 e.lineNumber = token->lineNumber;
236 e.filePosition = token->filePosition;
237 e.kindName = JsKinds [kind].name;
238 e.kind = JsKinds [kind].letter;
240 if ( vStringLength(fullscope) > 0 )
242 jsKind parent_kind = JSTAG_CLASS;
244 /* if we're creating a function (and not a method),
245 * guess we're inside another function */
246 if (kind == JSTAG_FUNCTION)
247 parent_kind = JSTAG_FUNCTION;
249 e.extensionFields.scope[0] = JsKinds [parent_kind].name;
250 e.extensionFields.scope[1] = vStringValue (fullscope);
253 makeTagEntry (&e);
255 vStringDelete (fullscope);
259 static void makeClassTag (tokenInfo *const token)
261 vString * fulltag;
263 if ( ! token->ignoreTag )
265 fulltag = vStringNew ();
266 if (vStringLength (token->scope) > 0)
268 vStringCopy(fulltag, token->scope);
269 vStringCatS (fulltag, ".");
270 vStringCatS (fulltag, vStringValue(token->string));
272 else
274 vStringCopy(fulltag, token->string);
276 vStringTerminate(fulltag);
277 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
279 stringListAdd (ClassNames, vStringNewCopy (fulltag));
280 makeJsTag (token, JSTAG_CLASS);
282 vStringDelete (fulltag);
286 static void makeFunctionTag (tokenInfo *const token)
288 vString * fulltag;
290 if ( ! token->ignoreTag )
292 fulltag = vStringNew ();
293 if (vStringLength (token->scope) > 0)
295 vStringCopy(fulltag, token->scope);
296 vStringCatS (fulltag, ".");
297 vStringCatS (fulltag, vStringValue(token->string));
299 else
301 vStringCopy(fulltag, token->string);
303 vStringTerminate(fulltag);
304 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
306 stringListAdd (FunctionNames, vStringNewCopy (fulltag));
307 makeJsTag (token, JSTAG_FUNCTION);
309 vStringDelete (fulltag);
314 * Parsing functions
317 static int skipToCharacter (const int c)
319 int d;
322 d = fileGetc ();
323 } while (d != EOF && d != c);
324 return d;
327 static void parseString (vString *const string, const int delimiter)
329 boolean end = FALSE;
330 while (! end)
332 int c = fileGetc ();
333 if (c == EOF)
334 end = TRUE;
335 else if (c == '\\')
337 c = fileGetc(); /* This maybe a ' or ". */
338 vStringPut(string, c);
340 else if (c == delimiter)
341 end = TRUE;
342 else
343 vStringPut (string, c);
345 vStringTerminate (string);
348 /* Read a C identifier beginning with "firstChar" and places it into
349 * "name".
351 static void parseIdentifier (vString *const string, const int firstChar)
353 int c = firstChar;
354 Assert (isIdentChar (c));
357 vStringPut (string, c);
358 c = fileGetc ();
359 } while (isIdentChar (c));
360 vStringTerminate (string);
361 if (!isspace (c))
362 fileUngetc (c); /* unget non-identifier character */
365 static keywordId analyzeToken (vString *const name)
367 vString *keyword = vStringNew ();
368 keywordId result;
369 vStringCopyToLower (keyword, name);
370 result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
371 vStringDelete (keyword);
372 return result;
375 static void readToken (tokenInfo *const token)
377 int c;
379 token->type = TOKEN_UNDEFINED;
380 token->keyword = KEYWORD_NONE;
381 vStringClear (token->string);
383 getNextChar:
386 c = fileGetc ();
388 while (c == '\t' || c == ' ' || c == '\n');
390 token->lineNumber = getSourceLineNumber ();
391 token->filePosition = getInputFilePosition ();
393 switch (c)
395 case EOF: longjmp (Exception, (int)ExceptionEOF); break;
396 case '(': token->type = TOKEN_OPEN_PAREN; break;
397 case ')': token->type = TOKEN_CLOSE_PAREN; break;
398 case ';': token->type = TOKEN_SEMICOLON; break;
399 case ',': token->type = TOKEN_COMMA; break;
400 case '.': token->type = TOKEN_PERIOD; break;
401 case ':': token->type = TOKEN_COLON; break;
402 case '{': token->type = TOKEN_OPEN_CURLY; break;
403 case '}': token->type = TOKEN_CLOSE_CURLY; break;
404 case '=': token->type = TOKEN_EQUAL_SIGN; break;
405 case '[': token->type = TOKEN_OPEN_SQUARE; break;
406 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
408 case '\'':
409 case '"':
410 token->type = TOKEN_STRING;
411 parseString (token->string, c);
412 token->lineNumber = getSourceLineNumber ();
413 token->filePosition = getInputFilePosition ();
414 break;
416 case '\\':
417 c = fileGetc ();
418 if (c != '\\' && c != '"' && !isspace (c))
419 fileUngetc (c);
420 token->type = TOKEN_CHARACTER;
421 token->lineNumber = getSourceLineNumber ();
422 token->filePosition = getInputFilePosition ();
423 break;
425 case '/':
427 int d = fileGetc ();
428 if ( (d != '*') && /* is this the start of a comment? */
429 (d != '/') ) /* is a one line comment? */
431 token->type = TOKEN_FORWARD_SLASH;
432 fileUngetc (d);
434 else
436 if (d == '*')
440 skipToCharacter ('*');
441 c = fileGetc ();
442 if (c == '/')
443 break;
444 else
445 fileUngetc (c);
446 } while (c != EOF && c != '\0');
447 goto getNextChar;
449 else if (d == '/') /* is this the start of a comment? */
451 skipToCharacter ('\n');
452 goto getNextChar;
455 break;
458 default:
459 if (! isIdentChar (c))
460 token->type = TOKEN_UNDEFINED;
461 else
463 parseIdentifier (token->string, c);
464 token->lineNumber = getSourceLineNumber ();
465 token->filePosition = getInputFilePosition ();
466 token->keyword = analyzeToken (token->string);
467 if (isKeyword (token, KEYWORD_NONE))
468 token->type = TOKEN_IDENTIFIER;
469 else
470 token->type = TOKEN_KEYWORD;
472 break;
476 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
478 dest->nestLevel = src->nestLevel;
479 dest->lineNumber = src->lineNumber;
480 dest->filePosition = src->filePosition;
481 dest->type = src->type;
482 dest->keyword = src->keyword;
483 vStringCopy(dest->string, src->string);
484 vStringCopy(dest->scope, src->scope);
488 * Token parsing functions
491 static void skipArgumentList (tokenInfo *const token)
493 int nest_level = 0;
496 * Other databases can have arguments with fully declared
497 * datatypes:
498 * ( name varchar(30), text binary(10) )
499 * So we must check for nested open and closing parantheses
502 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
504 nest_level++;
505 while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
507 readToken (token);
508 if (isType (token, TOKEN_OPEN_PAREN))
510 nest_level++;
512 if (isType (token, TOKEN_CLOSE_PAREN))
514 if (nest_level > 0)
516 nest_level--;
520 readToken (token);
524 static void skipArrayList (tokenInfo *const token)
526 int nest_level = 0;
529 * Handle square brackets
530 * var name[1]
531 * So we must check for nested open and closing square brackets
534 if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
536 nest_level++;
537 while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
539 readToken (token);
540 if (isType (token, TOKEN_OPEN_SQUARE))
542 nest_level++;
544 if (isType (token, TOKEN_CLOSE_SQUARE))
546 if (nest_level > 0)
548 nest_level--;
552 readToken (token);
556 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
558 if (vStringLength (parent->string) > 0)
560 vStringCatS (parent->string, ".");
562 vStringCatS (parent->string, vStringValue(child->string));
563 vStringTerminate(parent->string);
566 static void addToScope (tokenInfo* const token, vString* const extra)
568 if (vStringLength (token->scope) > 0)
570 vStringCatS (token->scope, ".");
572 vStringCatS (token->scope, vStringValue(extra));
573 vStringTerminate(token->scope);
577 * Scanning functions
580 static void findCmdTerm (tokenInfo *const token)
583 * Read until we find either a semicolon or closing brace.
584 * Any nested braces will be handled within.
586 while (! ( isType (token, TOKEN_SEMICOLON) ||
587 isType (token, TOKEN_CLOSE_CURLY) ) )
589 /* Handle nested blocks */
590 if ( isType (token, TOKEN_OPEN_CURLY))
592 parseBlock (token, token);
593 readToken (token);
595 else if ( isType (token, TOKEN_OPEN_PAREN) )
597 skipArgumentList(token);
599 else
601 readToken (token);
606 static void parseSwitch (tokenInfo *const token)
609 * switch (expression){
610 * case value1:
611 * statement;
612 * break;
613 * case value2:
614 * statement;
615 * break;
616 * default : statement;
620 readToken (token);
622 if (isType (token, TOKEN_OPEN_PAREN))
625 * Handle nameless functions, these will only
626 * be considered methods.
628 skipArgumentList(token);
631 if (isType (token, TOKEN_OPEN_CURLY))
634 * This will be either a function or a class.
635 * We can only determine this by checking the body
636 * of the function. If we find a "this." we know
637 * it is a class, otherwise it is a function.
639 parseBlock (token, token);
644 static void parseLoop (tokenInfo *const token)
647 * Handles these statements
648 * for (x=0; x<3; x++)
649 * document.write("This text is repeated three times<br>");
651 * for (x=0; x<3; x++)
653 * document.write("This text is repeated three times<br>");
656 * while (number<5){
657 * document.write(number+"<br>");
658 * number++;
661 * do{
662 * document.write(number+"<br>");
663 * number++;
665 * while (number<5);
668 if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
670 readToken(token);
672 if (isType (token, TOKEN_OPEN_PAREN))
675 * Handle nameless functions, these will only
676 * be considered methods.
678 skipArgumentList(token);
681 if (isType (token, TOKEN_OPEN_CURLY))
684 * This will be either a function or a class.
685 * We can only determine this by checking the body
686 * of the function. If we find a "this." we know
687 * it is a class, otherwise it is a function.
689 parseBlock (token, token);
691 else
693 parseLine(token, FALSE);
696 else if (isKeyword (token, KEYWORD_do))
698 readToken(token);
700 if (isType (token, TOKEN_OPEN_CURLY))
703 * This will be either a function or a class.
704 * We can only determine this by checking the body
705 * of the function. If we find a "this." we know
706 * it is a class, otherwise it is a function.
708 parseBlock (token, token);
710 else
712 parseLine(token, FALSE);
715 readToken(token);
717 if (isKeyword (token, KEYWORD_while))
719 readToken(token);
721 if (isType (token, TOKEN_OPEN_PAREN))
724 * Handle nameless functions, these will only
725 * be considered methods.
727 skipArgumentList(token);
733 static boolean parseIf (tokenInfo *const token)
735 boolean read_next_token = TRUE;
737 * If statements have two forms
738 * if ( ... )
739 * one line;
741 * if ( ... )
742 * statement;
743 * else
744 * statement
746 * if ( ... ) {
747 * multiple;
748 * statements;
752 * if ( ... ) {
753 * return elem
756 * This example if correctly written, but the
757 * else contains only 1 statement without a terminator
758 * since the function finishes with the closing brace.
760 * function a(flag){
761 * if(flag)
762 * test(1);
763 * else
764 * test(2)
767 * TODO: Deal with statements that can optional end
768 * without a semi-colon. Currently this messes up
769 * the parsing of blocks.
770 * Need to somehow detect this has happened, and either
771 * backup a token, or skip reading the next token if
772 * that is possible from all code locations.
776 readToken (token);
778 if (isKeyword (token, KEYWORD_if))
781 * Check for an "else if" and consume the "if"
783 readToken (token);
786 if (isType (token, TOKEN_OPEN_PAREN))
789 * Handle nameless functions, these will only
790 * be considered methods.
792 skipArgumentList(token);
795 if (isType (token, TOKEN_OPEN_CURLY))
798 * This will be either a function or a class.
799 * We can only determine this by checking the body
800 * of the function. If we find a "this." we know
801 * it is a class, otherwise it is a function.
803 parseBlock (token, token);
805 else
807 findCmdTerm (token);
809 /* The next token should only be read if this statement had its own
810 * terminator */
811 read_next_token = isType (token, TOKEN_SEMICOLON);
813 return read_next_token;
816 static void parseFunction (tokenInfo *const token)
818 tokenInfo *const name = newToken ();
819 boolean is_class = FALSE;
822 * This deals with these formats
823 * function validFunctionTwo(a,b) {}
826 readToken (name);
827 /* Add scope in case this is an INNER function */
828 addToScope(name, token->scope);
830 readToken (token);
831 while (isType (token, TOKEN_PERIOD))
833 readToken (token);
834 if ( isKeyword(token, KEYWORD_NONE) )
836 addContext (name, token);
837 readToken (token);
841 if ( isType (token, TOKEN_OPEN_PAREN) )
842 skipArgumentList(token);
844 if ( isType (token, TOKEN_OPEN_CURLY) )
846 is_class = parseBlock (token, name);
847 if ( is_class )
848 makeClassTag (name);
849 else
850 makeFunctionTag (name);
853 findCmdTerm (token);
855 deleteToken (name);
858 static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
860 boolean is_class = FALSE;
861 boolean read_next_token = TRUE;
862 vString * saveScope = vStringNew ();
864 token->nestLevel++;
866 * Make this routine a bit more forgiving.
867 * If called on an open_curly advance it
869 if ( isType (token, TOKEN_OPEN_CURLY) &&
870 isKeyword(token, KEYWORD_NONE) )
871 readToken(token);
873 if (! isType (token, TOKEN_CLOSE_CURLY))
876 * Read until we find the closing brace,
877 * any nested braces will be handled within
881 read_next_token = TRUE;
882 if (isKeyword (token, KEYWORD_this))
885 * Means we are inside a class and have found
886 * a class, not a function
888 is_class = TRUE;
889 vStringCopy(saveScope, token->scope);
890 addToScope (token, parent->string);
893 * Ignore the remainder of the line
894 * findCmdTerm(token);
896 parseLine (token, is_class);
898 vStringCopy(token->scope, saveScope);
900 else if (isKeyword (token, KEYWORD_var))
903 * Potentially we have found an inner function.
904 * Set something to indicate the scope
906 vStringCopy(saveScope, token->scope);
907 addToScope (token, parent->string);
908 parseLine (token, is_class);
909 vStringCopy(token->scope, saveScope);
911 else if (isKeyword (token, KEYWORD_function))
913 vStringCopy(saveScope, token->scope);
914 addToScope (token, parent->string);
915 parseFunction (token);
916 vStringCopy(token->scope, saveScope);
918 else if (isType (token, TOKEN_OPEN_CURLY))
920 /* Handle nested blocks */
921 parseBlock (token, parent);
923 else
926 * It is possible for a line to have no terminator
927 * if the following line is a closing brace.
928 * parseLine will detect this case and indicate
929 * whether we should read an additional token.
931 read_next_token = parseLine (token, is_class);
935 * Always read a new token unless we find a statement without
936 * a ending terminator
938 if( read_next_token )
939 readToken(token);
942 * If we find a statement without a terminator consider the
943 * block finished, otherwise the stack will be off by one.
945 } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
948 vStringDelete(saveScope);
949 token->nestLevel--;
951 return is_class;
954 static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
956 tokenInfo *const name = newToken ();
957 boolean has_methods = FALSE;
960 * This deals with these formats
961 * validProperty : 2,
962 * validMethod : function(a,b) {}
963 * 'validMethod2' : function(a,b) {}
964 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
969 readToken (token);
970 if (isType (token, TOKEN_CLOSE_CURLY))
973 * This was most likely a variable declaration of a hash table.
974 * indicate there were no methods and return.
976 has_methods = FALSE;
977 goto cleanUp;
980 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
982 copyToken(name, token);
984 readToken (token);
985 if ( isType (token, TOKEN_COLON) )
987 readToken (token);
988 if ( isKeyword (token, KEYWORD_function) )
990 readToken (token);
991 if ( isType (token, TOKEN_OPEN_PAREN) )
993 skipArgumentList(token);
996 if (isType (token, TOKEN_OPEN_CURLY))
998 has_methods = TRUE;
999 addToScope (name, class->string);
1000 makeJsTag (name, JSTAG_METHOD);
1001 parseBlock (token, name);
1004 * Read to the closing curly, check next
1005 * token, if a comma, we must loop again
1007 readToken (token);
1010 else
1012 has_methods = TRUE;
1013 addToScope (name, class->string);
1014 makeJsTag (name, JSTAG_PROPERTY);
1017 * Read the next token, if a comma
1018 * we must loop again
1020 readToken (token);
1024 } while ( isType(token, TOKEN_COMMA) );
1026 findCmdTerm (token);
1028 cleanUp:
1029 deleteToken (name);
1031 return has_methods;
1034 static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
1036 tokenInfo *const name = newToken ();
1037 tokenInfo *const secondary_name = newToken ();
1038 tokenInfo *const method_body_token = newToken ();
1039 vString * saveScope = vStringNew ();
1040 boolean is_class = FALSE;
1041 boolean is_terminated = TRUE;
1042 boolean is_global = FALSE;
1043 boolean has_methods = FALSE;
1044 vString * fulltag;
1046 vStringClear(saveScope);
1048 * Functions can be named or unnamed.
1049 * This deals with these formats:
1050 * Function
1051 * validFunctionOne = function(a,b) {}
1052 * testlib.validFunctionFive = function(a,b) {}
1053 * var innerThree = function(a,b) {}
1054 * var innerFour = (a,b) {}
1055 * var D2 = secondary_fcn_name(a,b) {}
1056 * var D3 = new Function("a", "b", "return a+b;");
1057 * Class
1058 * testlib.extras.ValidClassOne = function(a,b) {
1059 * this.a = a;
1061 * Class Methods
1062 * testlib.extras.ValidClassOne.prototype = {
1063 * 'validMethodOne' : function(a,b) {},
1064 * 'validMethodTwo' : function(a,b) {}
1066 * ValidClassTwo = function ()
1068 * this.validMethodThree = function() {}
1069 * // unnamed method
1070 * this.validMethodFour = () {}
1072 * Database.prototype.validMethodThree = Database_getTodaysDate;
1075 if ( is_inside_class )
1076 is_class = TRUE;
1078 * var can preceed an inner function
1080 if ( isKeyword(token, KEYWORD_var) )
1083 * Only create variables for global scope
1085 if ( token->nestLevel == 0 )
1087 is_global = TRUE;
1089 readToken(token);
1092 if ( isKeyword(token, KEYWORD_this) )
1094 readToken(token);
1095 if (isType (token, TOKEN_PERIOD))
1097 readToken(token);
1101 copyToken(name, token);
1103 while (! isType (token, TOKEN_CLOSE_CURLY) &&
1104 ! isType (token, TOKEN_SEMICOLON) &&
1105 ! isType (token, TOKEN_EQUAL_SIGN) )
1107 /* Potentially the name of the function */
1108 readToken (token);
1109 if (isType (token, TOKEN_PERIOD))
1112 * Cannot be a global variable is it has dot references in the name
1114 is_global = FALSE;
1117 readToken (token);
1118 if ( isKeyword(token, KEYWORD_NONE) )
1120 if ( is_class )
1122 vStringCopy(saveScope, token->scope);
1123 addToScope(token, name->string);
1125 else
1126 addContext (name, token);
1128 else if ( isKeyword(token, KEYWORD_prototype) )
1131 * When we reach the "prototype" tag, we infer:
1132 * "BindAgent" is a class
1133 * "build" is a method
1135 * function BindAgent( repeatableIdName, newParentIdName ) {
1138 * CASE 1
1139 * Specified function name: "build"
1140 * BindAgent.prototype.build = function( mode ) {
1141 * maybe parse nested functions
1144 * CASE 2
1145 * Prototype listing
1146 * ValidClassOne.prototype = {
1147 * 'validMethodOne' : function(a,b) {},
1148 * 'validMethodTwo' : function(a,b) {}
1152 makeClassTag (name);
1153 is_class = TRUE;
1156 * There should a ".function_name" next.
1158 readToken (token);
1159 if (isType (token, TOKEN_PERIOD))
1162 * Handle CASE 1
1164 readToken (token);
1165 if ( isKeyword(token, KEYWORD_NONE) )
1167 vStringCopy(saveScope, token->scope);
1168 addToScope(token, name->string);
1169 makeJsTag (token, JSTAG_METHOD);
1171 readToken (method_body_token);
1173 while (! ( isType (method_body_token, TOKEN_SEMICOLON) ||
1174 isType (method_body_token, TOKEN_CLOSE_CURLY) ||
1175 isType (method_body_token, TOKEN_OPEN_CURLY)) )
1177 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1178 skipArgumentList(method_body_token);
1179 else
1180 readToken (method_body_token);
1183 if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1184 parseBlock (method_body_token, token);
1186 is_terminated = TRUE;
1187 goto cleanUp;
1190 else if (isType (token, TOKEN_EQUAL_SIGN))
1192 readToken (token);
1193 if (isType (token, TOKEN_OPEN_CURLY))
1196 * Handle CASE 2
1198 * Creates tags for each of these class methods
1199 * ValidClassOne.prototype = {
1200 * 'validMethodOne' : function(a,b) {},
1201 * 'validMethodTwo' : function(a,b) {}
1204 parseMethods(token, name);
1206 * Find to the end of the statement
1208 findCmdTerm (token);
1209 token->ignoreTag = FALSE;
1210 is_terminated = TRUE;
1211 goto cleanUp;
1215 readToken (token);
1216 } while (isType (token, TOKEN_PERIOD));
1219 if ( isType (token, TOKEN_OPEN_PAREN) )
1220 skipArgumentList(token);
1222 if ( isType (token, TOKEN_OPEN_SQUARE) )
1223 skipArrayList(token);
1226 if ( isType (token, TOKEN_OPEN_CURLY) )
1228 is_class = parseBlock (token, name);
1233 if ( isType (token, TOKEN_CLOSE_CURLY) )
1236 * Reaching this section without having
1237 * processed an open curly brace indicates
1238 * the statement is most likely not terminated.
1240 is_terminated = FALSE;
1241 goto cleanUp;
1244 if ( isType (token, TOKEN_SEMICOLON) )
1247 * Only create variables for global scope
1249 if ( token->nestLevel == 0 && is_global )
1252 * Handles this syntax:
1253 * var g_var2;
1255 if (isType (token, TOKEN_SEMICOLON))
1256 makeJsTag (name, JSTAG_VARIABLE);
1259 * Statement has ended.
1260 * This deals with calls to functions, like:
1261 * alert(..);
1263 goto cleanUp;
1266 if ( isType (token, TOKEN_EQUAL_SIGN) )
1268 readToken (token);
1270 if ( isKeyword (token, KEYWORD_function) )
1272 readToken (token);
1274 if ( isKeyword (token, KEYWORD_NONE) &&
1275 ! isType (token, TOKEN_OPEN_PAREN) )
1278 * Functions of this format:
1279 * var D2A = function theAdd(a, b)
1281 * return a+b;
1283 * Are really two separate defined functions and
1284 * can be referenced in two ways:
1285 * alert( D2A(1,2) ); // produces 3
1286 * alert( theAdd(1,2) ); // also produces 3
1287 * So it must have two tags:
1288 * D2A
1289 * theAdd
1290 * Save the reference to the name for later use, once
1291 * we have established this is a valid function we will
1292 * create the secondary reference to it.
1294 copyToken(secondary_name, token);
1295 readToken (token);
1298 if ( isType (token, TOKEN_OPEN_PAREN) )
1299 skipArgumentList(token);
1301 if (isType (token, TOKEN_OPEN_CURLY))
1304 * This will be either a function or a class.
1305 * We can only determine this by checking the body
1306 * of the function. If we find a "this." we know
1307 * it is a class, otherwise it is a function.
1309 if ( is_inside_class )
1311 makeJsTag (name, JSTAG_METHOD);
1312 if ( vStringLength(secondary_name->string) > 0 )
1313 makeFunctionTag (secondary_name);
1314 parseBlock (token, name);
1316 else
1318 is_class = parseBlock (token, name);
1319 if ( is_class )
1320 makeClassTag (name);
1321 else
1322 makeFunctionTag (name);
1324 if ( vStringLength(secondary_name->string) > 0 )
1325 makeFunctionTag (secondary_name);
1328 * Find to the end of the statement
1330 goto cleanUp;
1334 else if (isType (token, TOKEN_OPEN_PAREN))
1337 * Handle nameless functions
1338 * this.method_name = () {}
1340 skipArgumentList(token);
1342 if (isType (token, TOKEN_OPEN_CURLY))
1345 * Nameless functions are only setup as methods.
1347 makeJsTag (name, JSTAG_METHOD);
1348 parseBlock (token, name);
1351 else if (isType (token, TOKEN_OPEN_CURLY))
1354 * Creates tags for each of these class methods
1355 * ValidClassOne.prototype = {
1356 * 'validMethodOne' : function(a,b) {},
1357 * 'validMethodTwo' : function(a,b) {}
1359 * Or checks if this is a hash variable.
1360 * var z = {};
1362 has_methods = parseMethods(token, name);
1363 if ( ! has_methods )
1366 * Only create variables for global scope
1368 if ( token->nestLevel == 0 && is_global )
1371 * A pointer can be created to the function.
1372 * If we recognize the function/class name ignore the variable.
1373 * This format looks identical to a variable definition.
1374 * A variable defined outside of a block is considered
1375 * a global variable:
1376 * var g_var1 = 1;
1377 * var g_var2;
1378 * This is not a global variable:
1379 * var g_var = function;
1380 * This is a global variable:
1381 * var g_var = different_var_name;
1383 fulltag = vStringNew ();
1384 if (vStringLength (token->scope) > 0)
1386 vStringCopy(fulltag, token->scope);
1387 vStringCatS (fulltag, ".");
1388 vStringCatS (fulltag, vStringValue(token->string));
1390 else
1392 vStringCopy(fulltag, token->string);
1394 vStringTerminate(fulltag);
1395 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1396 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1398 readToken (token);
1399 if ( ! isType (token, TOKEN_SEMICOLON))
1400 findCmdTerm (token);
1401 if (isType (token, TOKEN_SEMICOLON))
1402 makeJsTag (name, JSTAG_VARIABLE);
1404 vStringDelete (fulltag);
1407 if (isType (token, TOKEN_CLOSE_CURLY))
1410 * Assume the closing parantheses terminates
1411 * this statements.
1413 is_terminated = TRUE;
1416 else if (isKeyword (token, KEYWORD_new))
1418 readToken (token);
1419 if ( isKeyword (token, KEYWORD_function) ||
1420 isKeyword (token, KEYWORD_capital_function) ||
1421 isKeyword (token, KEYWORD_object) ||
1422 isKeyword (token, KEYWORD_capital_object) )
1424 if ( isKeyword (token, KEYWORD_object) ||
1425 isKeyword (token, KEYWORD_capital_object) )
1426 is_class = TRUE;
1428 readToken (token);
1429 if ( isType (token, TOKEN_OPEN_PAREN) )
1430 skipArgumentList(token);
1432 if (isType (token, TOKEN_SEMICOLON))
1434 if ( token->nestLevel == 0 )
1436 if ( is_class )
1438 makeClassTag (name);
1439 } else {
1440 makeFunctionTag (name);
1446 else if (isKeyword (token, KEYWORD_NONE))
1449 * Only create variables for global scope
1451 if ( token->nestLevel == 0 && is_global )
1454 * A pointer can be created to the function.
1455 * If we recognize the function/class name ignore the variable.
1456 * This format looks identical to a variable definition.
1457 * A variable defined outside of a block is considered
1458 * a global variable:
1459 * var g_var1 = 1;
1460 * var g_var2;
1461 * This is not a global variable:
1462 * var g_var = function;
1463 * This is a global variable:
1464 * var g_var = different_var_name;
1466 fulltag = vStringNew ();
1467 if (vStringLength (token->scope) > 0)
1469 vStringCopy(fulltag, token->scope);
1470 vStringCatS (fulltag, ".");
1471 vStringCatS (fulltag, vStringValue(token->string));
1473 else
1475 vStringCopy(fulltag, token->string);
1477 vStringTerminate(fulltag);
1478 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1479 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1481 findCmdTerm (token);
1482 if (isType (token, TOKEN_SEMICOLON))
1483 makeJsTag (name, JSTAG_VARIABLE);
1485 vStringDelete (fulltag);
1489 findCmdTerm (token);
1492 * Statements can be optionally terminated in the case of
1493 * statement prior to a close curly brace as in the
1494 * document.write line below:
1496 * function checkForUpdate() {
1497 * if( 1==1 ) {
1498 * document.write("hello from checkForUpdate<br>")
1500 * return 1;
1503 if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY))
1504 is_terminated = FALSE;
1507 cleanUp:
1508 vStringCopy(token->scope, saveScope);
1509 deleteToken (name);
1510 deleteToken (secondary_name);
1511 deleteToken (method_body_token);
1512 vStringDelete(saveScope);
1514 return is_terminated;
1517 static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
1519 boolean is_terminated = TRUE;
1521 * Detect the common statements, if, while, for, do, ...
1522 * This is necessary since the last statement within a block "{}"
1523 * can be optionally terminated.
1525 * If the statement is not terminated, we need to tell
1526 * the calling routine to prevent reading an additional token
1527 * looking for the end of the statement.
1530 if (isType(token, TOKEN_KEYWORD))
1532 switch (token->keyword)
1534 case KEYWORD_for:
1535 case KEYWORD_while:
1536 case KEYWORD_do:
1537 parseLoop (token);
1538 break;
1539 case KEYWORD_if:
1540 case KEYWORD_else:
1541 case KEYWORD_try:
1542 case KEYWORD_catch:
1543 case KEYWORD_finally:
1544 /* Common semantics */
1545 is_terminated = parseIf (token);
1546 break;
1547 case KEYWORD_switch:
1548 parseSwitch (token);
1549 break;
1550 default:
1551 parseStatement (token, is_inside_class);
1552 break;
1555 else
1558 * Special case where single line statements may not be
1559 * SEMICOLON terminated. parseBlock needs to know this
1560 * so that it does not read the next token.
1562 is_terminated = parseStatement (token, is_inside_class);
1564 return is_terminated;
1567 static void parseJsFile (tokenInfo *const token)
1571 readToken (token);
1573 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1574 parseFunction (token);
1575 else
1576 parseLine (token, FALSE);
1577 } while (TRUE);
1580 static void initialize (const langType language)
1582 Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
1583 Lang_js = language;
1584 buildJsKeywordHash ();
1587 static void findJsTags (void)
1589 tokenInfo *const token = newToken ();
1590 exception_t exception;
1592 ClassNames = stringListNew ();
1593 FunctionNames = stringListNew ();
1595 exception = (exception_t) (setjmp (Exception));
1596 while (exception == ExceptionNone)
1597 parseJsFile (token);
1599 stringListDelete (ClassNames);
1600 stringListDelete (FunctionNames);
1601 ClassNames = NULL;
1602 FunctionNames = NULL;
1603 deleteToken (token);
1606 /* Create parser definition stucture */
1607 extern parserDefinition* JavaScriptParser (void)
1609 static const char *const extensions [] = { "js", NULL };
1610 parserDefinition *const def = parserNew ("JavaScript");
1611 def->extensions = extensions;
1613 * New definitions for parsing instead of regex
1615 def->kinds = JsKinds;
1616 def->kindCount = KIND_COUNT (JsKinds);
1617 def->parser = findJsTags;
1618 def->initialize = initialize;
1620 return def;
1622 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */