javascript: Fix handling some class-related unterminated statements
[geany-mirror.git] / tagmanager / ctags / js.c
blob99ffcc7b35851df448b4540c33aca88760cbf5f2
1 /*
2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for JavaScript language
8 * files.
10 * This is a good reference for different forms of the function statement:
11 * http://www.permadi.com/tutorial/jsFunc/
12 * Another good reference:
13 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
17 * INCLUDE FILES
19 #include "general.h" /* must always come first */
20 #include <ctype.h> /* to define isalpha () */
21 #include <string.h>
22 #include <setjmp.h>
23 #include <mio/mio.h>
24 #ifdef DEBUG
25 #include <stdio.h>
26 #endif
28 #include "keyword.h"
29 #include "parse.h"
30 #include "read.h"
31 #include "main.h"
32 #include "vstring.h"
35 * MACROS
37 #define isType(token,t) (boolean) ((token)->type == (t))
38 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
41 * DATA DECLARATIONS
44 typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
47 * Tracks class and function names already created
49 static stringList *ClassNames;
50 static stringList *FunctionNames;
52 /* Used to specify type of keyword.
54 typedef enum eKeywordId {
55 KEYWORD_NONE = -1,
56 KEYWORD_function,
57 KEYWORD_capital_function,
58 KEYWORD_capital_object,
59 KEYWORD_prototype,
60 KEYWORD_var,
61 KEYWORD_new,
62 KEYWORD_this,
63 KEYWORD_for,
64 KEYWORD_while,
65 KEYWORD_do,
66 KEYWORD_if,
67 KEYWORD_else,
68 KEYWORD_switch,
69 KEYWORD_try,
70 KEYWORD_catch,
71 KEYWORD_finally,
72 KEYWORD_sap,
73 KEYWORD_return
74 } keywordId;
76 /* Used to determine whether keyword is valid for the token language and
77 * what its ID is.
79 typedef struct sKeywordDesc {
80 const char *name;
81 keywordId id;
82 } keywordDesc;
84 typedef enum eTokenType {
85 TOKEN_UNDEFINED,
86 TOKEN_CHARACTER,
87 TOKEN_CLOSE_PAREN,
88 TOKEN_SEMICOLON,
89 TOKEN_COLON,
90 TOKEN_COMMA,
91 TOKEN_KEYWORD,
92 TOKEN_OPEN_PAREN,
93 TOKEN_OPERATOR,
94 TOKEN_IDENTIFIER,
95 TOKEN_STRING,
96 TOKEN_PERIOD,
97 TOKEN_OPEN_CURLY,
98 TOKEN_CLOSE_CURLY,
99 TOKEN_EQUAL_SIGN,
100 TOKEN_FORWARD_SLASH,
101 TOKEN_OPEN_SQUARE,
102 TOKEN_CLOSE_SQUARE,
103 TOKEN_REGEXP
104 } tokenType;
106 typedef struct sTokenInfo {
107 tokenType type;
108 keywordId keyword;
109 vString * string;
110 vString * scope;
111 unsigned long lineNumber;
112 MIOPos filePosition;
113 int nestLevel;
114 boolean ignoreTag;
115 } tokenInfo;
118 * DATA DEFINITIONS
121 static tokenType LastTokenType;
123 static langType Lang_js;
125 static jmp_buf Exception;
127 typedef enum {
128 JSTAG_FUNCTION,
129 JSTAG_CLASS,
130 JSTAG_METHOD,
131 JSTAG_PROPERTY,
132 JSTAG_VARIABLE,
133 JSTAG_COUNT
134 } jsKind;
136 static kindOption JsKinds [] = {
137 { TRUE, 'f', "function", "functions" },
138 { TRUE, 'c', "class", "classes" },
139 { TRUE, 'm', "method", "methods" },
140 { TRUE, 'p', "member", "properties" },
141 { TRUE, 'v', "variable", "global variables" }
144 static const keywordDesc JsKeywordTable [] = {
145 /* keyword keyword ID */
146 { "function", KEYWORD_function },
147 { "Function", KEYWORD_capital_function },
148 { "Object", KEYWORD_capital_object },
149 { "prototype", KEYWORD_prototype },
150 { "var", KEYWORD_var },
151 { "new", KEYWORD_new },
152 { "this", KEYWORD_this },
153 { "for", KEYWORD_for },
154 { "while", KEYWORD_while },
155 { "do", KEYWORD_do },
156 { "if", KEYWORD_if },
157 { "else", KEYWORD_else },
158 { "switch", KEYWORD_switch },
159 { "try", KEYWORD_try },
160 { "catch", KEYWORD_catch },
161 { "finally", KEYWORD_finally },
162 { "sap", KEYWORD_sap },
163 { "return", KEYWORD_return }
167 * FUNCTION DEFINITIONS
170 /* Recursive functions */
171 static void parseFunction (tokenInfo *const token);
172 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent);
173 static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
174 static void parseUI5 (tokenInfo *const token);
176 static boolean isIdentChar (const int c)
178 return (boolean)
179 (isalpha (c) || isdigit (c) || c == '$' ||
180 c == '@' || c == '_' || c == '#');
183 static void buildJsKeywordHash (void)
185 const size_t count = sizeof (JsKeywordTable) /
186 sizeof (JsKeywordTable [0]);
187 size_t i;
188 for (i = 0 ; i < count ; ++i)
190 const keywordDesc* const p = &JsKeywordTable [i];
191 addKeyword (p->name, Lang_js, (int) p->id);
195 static tokenInfo *newToken (void)
197 tokenInfo *const token = xMalloc (1, tokenInfo);
199 token->type = TOKEN_UNDEFINED;
200 token->keyword = KEYWORD_NONE;
201 token->string = vStringNew ();
202 token->scope = vStringNew ();
203 token->nestLevel = 0;
204 token->ignoreTag = FALSE;
205 token->lineNumber = getSourceLineNumber ();
206 token->filePosition = getInputFilePosition ();
208 return token;
211 static void deleteToken (tokenInfo *const token)
213 vStringDelete (token->string);
214 vStringDelete (token->scope);
215 eFree (token);
219 * Tag generation functions
222 static void makeJsTag (tokenInfo *const token, const jsKind kind)
224 if (JsKinds [kind].enabled && ! token->ignoreTag )
226 const char *name = vStringValue (token->string);
227 vString *fullscope = vStringNewCopy (token->scope);
228 const char *p;
229 tagEntryInfo e;
231 if ((p = strrchr (name, '.')) != NULL)
233 if (vStringLength (fullscope) > 0)
234 vStringPut (fullscope, '.');
235 vStringNCatS (fullscope, name, p - name);
236 name = p + 1;
239 initTagEntry (&e, name);
241 e.lineNumber = token->lineNumber;
242 e.filePosition = token->filePosition;
243 e.kindName = JsKinds [kind].name;
244 e.kind = JsKinds [kind].letter;
246 if ( vStringLength(fullscope) > 0 )
248 jsKind parent_kind = JSTAG_CLASS;
250 /* if we're creating a function (and not a method),
251 * guess we're inside another function */
252 if (kind == JSTAG_FUNCTION)
253 parent_kind = JSTAG_FUNCTION;
255 e.extensionFields.scope[0] = JsKinds [parent_kind].name;
256 e.extensionFields.scope[1] = vStringValue (fullscope);
259 makeTagEntry (&e);
261 vStringDelete (fullscope);
265 static void makeClassTag (tokenInfo *const token)
267 vString * fulltag;
269 if ( ! token->ignoreTag )
271 fulltag = vStringNew ();
272 if (vStringLength (token->scope) > 0)
274 vStringCopy(fulltag, token->scope);
275 vStringCatS (fulltag, ".");
276 vStringCatS (fulltag, vStringValue(token->string));
278 else
280 vStringCopy(fulltag, token->string);
282 vStringTerminate(fulltag);
283 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
285 stringListAdd (ClassNames, vStringNewCopy (fulltag));
286 makeJsTag (token, JSTAG_CLASS);
288 vStringDelete (fulltag);
292 static void makeFunctionTag (tokenInfo *const token)
294 vString * fulltag;
296 if ( ! token->ignoreTag )
298 fulltag = vStringNew ();
299 if (vStringLength (token->scope) > 0)
301 vStringCopy(fulltag, token->scope);
302 vStringCatS (fulltag, ".");
303 vStringCatS (fulltag, vStringValue(token->string));
305 else
307 vStringCopy(fulltag, token->string);
309 vStringTerminate(fulltag);
310 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
312 stringListAdd (FunctionNames, vStringNewCopy (fulltag));
313 makeJsTag (token, JSTAG_FUNCTION);
315 vStringDelete (fulltag);
320 * Parsing functions
323 static int skipToCharacter (const int c)
325 int d;
328 d = fileGetc ();
329 } while (d != EOF && d != c);
330 return d;
333 static void parseString (vString *const string, const int delimiter)
335 boolean end = FALSE;
336 while (! end)
338 int c = fileGetc ();
339 if (c == EOF)
340 end = TRUE;
341 else if (c == '\\')
343 c = fileGetc(); /* This maybe a ' or ". */
344 vStringPut(string, c);
346 else if (c == delimiter)
347 end = TRUE;
348 else
349 vStringPut (string, c);
351 vStringTerminate (string);
354 static void parseRegExp (void)
356 int c;
357 boolean in_range = FALSE;
361 c = fileGetc ();
362 if (! in_range && c == '/')
364 do /* skip flags */
366 c = fileGetc ();
367 } while (isalpha (c));
368 fileUngetc (c);
369 break;
371 else if (c == '\\')
372 c = fileGetc (); /* skip next character */
373 else if (c == '[')
374 in_range = TRUE;
375 else if (c == ']')
376 in_range = FALSE;
377 } while (c != EOF);
380 /* Read a C identifier beginning with "firstChar" and places it into
381 * "name".
383 static void parseIdentifier (vString *const string, const int firstChar)
385 int c = firstChar;
386 Assert (isIdentChar (c));
389 vStringPut (string, c);
390 c = fileGetc ();
391 } while (isIdentChar (c));
392 vStringTerminate (string);
393 if (!isspace (c))
394 fileUngetc (c); /* unget non-identifier character */
397 static keywordId analyzeToken (vString *const name)
399 vString *keyword = vStringNew ();
400 keywordId result;
401 vStringCopyToLower (keyword, name);
402 result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
403 vStringDelete (keyword);
404 return result;
407 static void readToken (tokenInfo *const token)
409 int c;
411 token->type = TOKEN_UNDEFINED;
412 token->keyword = KEYWORD_NONE;
413 vStringClear (token->string);
415 getNextChar:
418 c = fileGetc ();
420 while (c == '\t' || c == ' ' || c == '\n');
422 token->lineNumber = getSourceLineNumber ();
423 token->filePosition = getInputFilePosition ();
425 switch (c)
427 case EOF: longjmp (Exception, (int)ExceptionEOF); break;
428 case '(': token->type = TOKEN_OPEN_PAREN; break;
429 case ')': token->type = TOKEN_CLOSE_PAREN; break;
430 case ';': token->type = TOKEN_SEMICOLON; break;
431 case ',': token->type = TOKEN_COMMA; break;
432 case '.': token->type = TOKEN_PERIOD; break;
433 case ':': token->type = TOKEN_COLON; break;
434 case '{': token->type = TOKEN_OPEN_CURLY; break;
435 case '}': token->type = TOKEN_CLOSE_CURLY; break;
436 case '=': token->type = TOKEN_EQUAL_SIGN; break;
437 case '[': token->type = TOKEN_OPEN_SQUARE; break;
438 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
440 case '\'':
441 case '"':
442 token->type = TOKEN_STRING;
443 parseString (token->string, c);
444 token->lineNumber = getSourceLineNumber ();
445 token->filePosition = getInputFilePosition ();
446 break;
448 case '\\':
449 c = fileGetc ();
450 if (c != '\\' && c != '"' && !isspace (c))
451 fileUngetc (c);
452 token->type = TOKEN_CHARACTER;
453 token->lineNumber = getSourceLineNumber ();
454 token->filePosition = getInputFilePosition ();
455 break;
457 case '/':
459 int d = fileGetc ();
460 if ( (d != '*') && /* is this the start of a comment? */
461 (d != '/') ) /* is a one line comment? */
463 fileUngetc (d);
464 switch (LastTokenType)
466 case TOKEN_CHARACTER:
467 case TOKEN_IDENTIFIER:
468 case TOKEN_STRING:
469 case TOKEN_CLOSE_CURLY:
470 case TOKEN_CLOSE_PAREN:
471 case TOKEN_CLOSE_SQUARE:
472 token->type = TOKEN_FORWARD_SLASH;
473 break;
475 default:
476 token->type = TOKEN_REGEXP;
477 parseRegExp ();
478 token->lineNumber = getSourceLineNumber ();
479 token->filePosition = getInputFilePosition ();
480 break;
483 else
485 if (d == '*')
489 skipToCharacter ('*');
490 c = fileGetc ();
491 if (c == '/')
492 break;
493 else
494 fileUngetc (c);
495 } while (c != EOF && c != '\0');
496 goto getNextChar;
498 else if (d == '/') /* is this the start of a comment? */
500 skipToCharacter ('\n');
501 goto getNextChar;
504 break;
507 case '#':
508 /* skip shebang in case of e.g. Node.js scripts */
509 if (token->lineNumber > 1)
510 token->type = TOKEN_UNDEFINED;
511 else if ((c = fileGetc ()) != '!')
513 fileUngetc (c);
514 token->type = TOKEN_UNDEFINED;
516 else
518 skipToCharacter ('\n');
519 goto getNextChar;
521 break;
523 default:
524 if (! isIdentChar (c))
525 token->type = TOKEN_UNDEFINED;
526 else
528 parseIdentifier (token->string, c);
529 token->lineNumber = getSourceLineNumber ();
530 token->filePosition = getInputFilePosition ();
531 token->keyword = analyzeToken (token->string);
532 if (isKeyword (token, KEYWORD_NONE))
533 token->type = TOKEN_IDENTIFIER;
534 else
535 token->type = TOKEN_KEYWORD;
537 break;
540 LastTokenType = token->type;
543 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
545 dest->nestLevel = src->nestLevel;
546 dest->lineNumber = src->lineNumber;
547 dest->filePosition = src->filePosition;
548 dest->type = src->type;
549 dest->keyword = src->keyword;
550 vStringCopy(dest->string, src->string);
551 vStringCopy(dest->scope, src->scope);
555 * Token parsing functions
558 static void skipArgumentList (tokenInfo *const token)
560 int nest_level = 0;
563 * Other databases can have arguments with fully declared
564 * datatypes:
565 * ( name varchar(30), text binary(10) )
566 * So we must check for nested open and closing parantheses
569 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
571 nest_level++;
572 while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
574 readToken (token);
575 if (isType (token, TOKEN_OPEN_PAREN))
577 nest_level++;
579 if (isType (token, TOKEN_CLOSE_PAREN))
581 if (nest_level > 0)
583 nest_level--;
587 readToken (token);
591 static void skipArrayList (tokenInfo *const token)
593 int nest_level = 0;
596 * Handle square brackets
597 * var name[1]
598 * So we must check for nested open and closing square brackets
601 if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
603 nest_level++;
604 while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
606 readToken (token);
607 if (isType (token, TOKEN_OPEN_SQUARE))
609 nest_level++;
611 if (isType (token, TOKEN_CLOSE_SQUARE))
613 if (nest_level > 0)
615 nest_level--;
619 readToken (token);
623 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
625 if (vStringLength (parent->string) > 0)
627 vStringCatS (parent->string, ".");
629 vStringCatS (parent->string, vStringValue(child->string));
630 vStringTerminate(parent->string);
633 static void addToScope (tokenInfo* const token, vString* const extra)
635 if (vStringLength (token->scope) > 0)
637 vStringCatS (token->scope, ".");
639 vStringCatS (token->scope, vStringValue(extra));
640 vStringTerminate(token->scope);
644 * Scanning functions
647 static void findCmdTerm (tokenInfo *const token)
650 * Read until we find either a semicolon or closing brace.
651 * Any nested braces will be handled within.
653 while (! ( isType (token, TOKEN_SEMICOLON) ||
654 isType (token, TOKEN_CLOSE_CURLY) ) )
656 /* Handle nested blocks */
657 if ( isType (token, TOKEN_OPEN_CURLY))
659 parseBlock (token, token);
660 readToken (token);
662 else if ( isType (token, TOKEN_OPEN_PAREN) )
664 skipArgumentList(token);
666 else if ( isType (token, TOKEN_OPEN_SQUARE) )
668 skipArrayList(token);
670 else
672 readToken (token);
677 static void parseSwitch (tokenInfo *const token)
680 * switch (expression) {
681 * case value1:
682 * statement;
683 * break;
684 * case value2:
685 * statement;
686 * break;
687 * default : statement;
691 readToken (token);
693 if (isType (token, TOKEN_OPEN_PAREN))
696 * Handle nameless functions, these will only
697 * be considered methods.
699 skipArgumentList(token);
702 if (isType (token, TOKEN_OPEN_CURLY))
704 parseBlock (token, token);
708 static boolean parseLoop (tokenInfo *const token)
711 * Handles these statements
712 * for (x=0; x<3; x++)
713 * document.write("This text is repeated three times<br>");
715 * for (x=0; x<3; x++)
717 * document.write("This text is repeated three times<br>");
720 * while (number<5){
721 * document.write(number+"<br>");
722 * number++;
725 * do{
726 * document.write(number+"<br>");
727 * number++;
729 * while (number<5);
731 boolean is_terminated = TRUE;
733 if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
735 readToken(token);
737 if (isType (token, TOKEN_OPEN_PAREN))
740 * Handle nameless functions, these will only
741 * be considered methods.
743 skipArgumentList(token);
746 if (isType (token, TOKEN_OPEN_CURLY))
749 * This will be either a function or a class.
750 * We can only determine this by checking the body
751 * of the function. If we find a "this." we know
752 * it is a class, otherwise it is a function.
754 parseBlock (token, token);
756 else
758 is_terminated = parseLine(token, FALSE);
761 else if (isKeyword (token, KEYWORD_do))
763 readToken(token);
765 if (isType (token, TOKEN_OPEN_CURLY))
768 * This will be either a function or a class.
769 * We can only determine this by checking the body
770 * of the function. If we find a "this." we know
771 * it is a class, otherwise it is a function.
773 parseBlock (token, token);
775 else
777 is_terminated = parseLine(token, FALSE);
780 if (is_terminated)
781 readToken(token);
783 if (isKeyword (token, KEYWORD_while))
785 readToken(token);
787 if (isType (token, TOKEN_OPEN_PAREN))
790 * Handle nameless functions, these will only
791 * be considered methods.
793 skipArgumentList(token);
795 if (! isType (token, TOKEN_SEMICOLON))
796 is_terminated = FALSE;
800 return is_terminated;
803 static boolean parseIf (tokenInfo *const token)
805 boolean read_next_token = TRUE;
807 * If statements have two forms
808 * if ( ... )
809 * one line;
811 * if ( ... )
812 * statement;
813 * else
814 * statement
816 * if ( ... ) {
817 * multiple;
818 * statements;
822 * if ( ... ) {
823 * return elem
826 * This example if correctly written, but the
827 * else contains only 1 statement without a terminator
828 * since the function finishes with the closing brace.
830 * function a(flag){
831 * if(flag)
832 * test(1);
833 * else
834 * test(2)
837 * TODO: Deal with statements that can optional end
838 * without a semi-colon. Currently this messes up
839 * the parsing of blocks.
840 * Need to somehow detect this has happened, and either
841 * backup a token, or skip reading the next token if
842 * that is possible from all code locations.
846 readToken (token);
848 if (isKeyword (token, KEYWORD_if))
851 * Check for an "else if" and consume the "if"
853 readToken (token);
856 if (isType (token, TOKEN_OPEN_PAREN))
859 * Handle nameless functions, these will only
860 * be considered methods.
862 skipArgumentList(token);
865 if (isType (token, TOKEN_OPEN_CURLY))
868 * This will be either a function or a class.
869 * We can only determine this by checking the body
870 * of the function. If we find a "this." we know
871 * it is a class, otherwise it is a function.
873 parseBlock (token, token);
875 else
877 findCmdTerm (token);
879 /* The next token should only be read if this statement had its own
880 * terminator */
881 read_next_token = isType (token, TOKEN_SEMICOLON);
883 return read_next_token;
886 static void parseFunction (tokenInfo *const token)
888 tokenInfo *const name = newToken ();
889 boolean is_class = FALSE;
892 * This deals with these formats
893 * function validFunctionTwo(a,b) {}
896 readToken (name);
897 /* Add scope in case this is an INNER function */
898 addToScope(name, token->scope);
900 readToken (token);
901 while (isType (token, TOKEN_PERIOD))
903 readToken (token);
904 if ( isKeyword(token, KEYWORD_NONE) )
906 addContext (name, token);
907 readToken (token);
911 if ( isType (token, TOKEN_OPEN_PAREN) )
912 skipArgumentList(token);
914 if ( isType (token, TOKEN_OPEN_CURLY) )
916 is_class = parseBlock (token, name);
917 if ( is_class )
918 makeClassTag (name);
919 else
920 makeFunctionTag (name);
923 findCmdTerm (token);
925 deleteToken (name);
928 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent)
930 boolean is_class = FALSE;
931 boolean read_next_token = TRUE;
932 vString * saveScope = vStringNew ();
933 tokenInfo *const parent = newToken ();
935 /* backup the parent token to allow calls like parseBlock(token, token) */
936 copyToken (parent, orig_parent);
938 token->nestLevel++;
940 * Make this routine a bit more forgiving.
941 * If called on an open_curly advance it
943 if ( isType (token, TOKEN_OPEN_CURLY) &&
944 isKeyword(token, KEYWORD_NONE) )
945 readToken(token);
947 if (! isType (token, TOKEN_CLOSE_CURLY))
950 * Read until we find the closing brace,
951 * any nested braces will be handled within
955 read_next_token = TRUE;
956 if (isKeyword (token, KEYWORD_this))
959 * Means we are inside a class and have found
960 * a class, not a function
962 is_class = TRUE;
963 vStringCopy(saveScope, token->scope);
964 addToScope (token, parent->string);
967 * Ignore the remainder of the line
968 * findCmdTerm(token);
970 read_next_token = parseLine (token, is_class);
972 vStringCopy(token->scope, saveScope);
974 else if (isKeyword (token, KEYWORD_var))
977 * Potentially we have found an inner function.
978 * Set something to indicate the scope
980 vStringCopy(saveScope, token->scope);
981 addToScope (token, parent->string);
982 read_next_token = parseLine (token, is_class);
983 vStringCopy(token->scope, saveScope);
985 else if (isKeyword (token, KEYWORD_function))
987 vStringCopy(saveScope, token->scope);
988 addToScope (token, parent->string);
989 parseFunction (token);
990 vStringCopy(token->scope, saveScope);
992 else if (isType (token, TOKEN_OPEN_CURLY))
994 /* Handle nested blocks */
995 parseBlock (token, parent);
997 else
1000 * It is possible for a line to have no terminator
1001 * if the following line is a closing brace.
1002 * parseLine will detect this case and indicate
1003 * whether we should read an additional token.
1005 read_next_token = parseLine (token, is_class);
1009 * Always read a new token unless we find a statement without
1010 * a ending terminator
1012 if( read_next_token )
1013 readToken(token);
1016 * If we find a statement without a terminator consider the
1017 * block finished, otherwise the stack will be off by one.
1019 } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
1022 deleteToken (parent);
1023 vStringDelete(saveScope);
1024 token->nestLevel--;
1026 return is_class;
1029 static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
1031 tokenInfo *const name = newToken ();
1032 boolean has_methods = FALSE;
1035 * This deals with these formats
1036 * validProperty : 2,
1037 * validMethod : function(a,b) {}
1038 * 'validMethod2' : function(a,b) {}
1039 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1044 readToken (token);
1045 if (isType (token, TOKEN_CLOSE_CURLY))
1048 * This was most likely a variable declaration of a hash table.
1049 * indicate there were no methods and return.
1051 has_methods = FALSE;
1052 goto cleanUp;
1055 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
1057 copyToken(name, token);
1059 readToken (token);
1060 if ( isType (token, TOKEN_COLON) )
1062 readToken (token);
1063 if ( isKeyword (token, KEYWORD_function) )
1065 readToken (token);
1066 if ( isType (token, TOKEN_OPEN_PAREN) )
1068 skipArgumentList(token);
1071 if (isType (token, TOKEN_OPEN_CURLY))
1073 has_methods = TRUE;
1074 addToScope (name, class->string);
1075 makeJsTag (name, JSTAG_METHOD);
1076 parseBlock (token, name);
1079 * Read to the closing curly, check next
1080 * token, if a comma, we must loop again
1082 readToken (token);
1085 else
1087 vString * saveScope = vStringNew ();
1088 boolean has_child_methods = FALSE;
1090 /* skip whatever is the value */
1091 while (! isType (token, TOKEN_COMMA) &&
1092 ! isType (token, TOKEN_CLOSE_CURLY))
1094 if (isType (token, TOKEN_OPEN_CURLY))
1096 vStringCopy (saveScope, token->scope);
1097 addToScope (token, class->string);
1098 has_child_methods = parseMethods (token, name);
1099 vStringCopy (token->scope, saveScope);
1100 readToken (token);
1102 else if (isType (token, TOKEN_OPEN_PAREN))
1104 skipArgumentList (token);
1106 else if (isType (token, TOKEN_OPEN_SQUARE))
1108 skipArrayList (token);
1110 else
1112 readToken (token);
1115 vStringDelete (saveScope);
1117 has_methods = TRUE;
1118 addToScope (name, class->string);
1119 if (has_child_methods)
1120 makeJsTag (name, JSTAG_CLASS);
1121 else
1122 makeJsTag (name, JSTAG_PROPERTY);
1126 } while ( isType(token, TOKEN_COMMA) );
1128 findCmdTerm (token);
1130 cleanUp:
1131 deleteToken (name);
1133 return has_methods;
1136 static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
1138 tokenInfo *const name = newToken ();
1139 tokenInfo *const secondary_name = newToken ();
1140 tokenInfo *const method_body_token = newToken ();
1141 vString * saveScope = vStringNew ();
1142 boolean is_class = FALSE;
1143 boolean is_var = FALSE;
1144 boolean is_terminated = TRUE;
1145 boolean is_global = FALSE;
1146 boolean has_methods = FALSE;
1147 vString * fulltag;
1149 vStringClear(saveScope);
1151 * Functions can be named or unnamed.
1152 * This deals with these formats:
1153 * Function
1154 * validFunctionOne = function(a,b) {}
1155 * testlib.validFunctionFive = function(a,b) {}
1156 * var innerThree = function(a,b) {}
1157 * var innerFour = (a,b) {}
1158 * var D2 = secondary_fcn_name(a,b) {}
1159 * var D3 = new Function("a", "b", "return a+b;");
1160 * Class
1161 * testlib.extras.ValidClassOne = function(a,b) {
1162 * this.a = a;
1164 * Class Methods
1165 * testlib.extras.ValidClassOne.prototype = {
1166 * 'validMethodOne' : function(a,b) {},
1167 * 'validMethodTwo' : function(a,b) {}
1169 * ValidClassTwo = function ()
1171 * this.validMethodThree = function() {}
1172 * // unnamed method
1173 * this.validMethodFour = () {}
1175 * Database.prototype.validMethodThree = Database_getTodaysDate;
1178 if ( is_inside_class )
1179 is_class = TRUE;
1181 * var can preceed an inner function
1183 if ( isKeyword(token, KEYWORD_var) )
1186 * Only create variables for global scope
1188 if ( token->nestLevel == 0 )
1190 is_global = TRUE;
1192 readToken(token);
1195 if ( isKeyword(token, KEYWORD_this) )
1197 readToken(token);
1198 if (isType (token, TOKEN_PERIOD))
1200 readToken(token);
1204 copyToken(name, token);
1206 while (! isType (token, TOKEN_CLOSE_CURLY) &&
1207 ! isType (token, TOKEN_SEMICOLON) &&
1208 ! isType (token, TOKEN_EQUAL_SIGN) )
1210 if (isType (token, TOKEN_OPEN_CURLY))
1211 parseBlock (token, token);
1213 /* Potentially the name of the function */
1214 readToken (token);
1215 if (isType (token, TOKEN_PERIOD))
1218 * Cannot be a global variable is it has dot references in the name
1220 is_global = FALSE;
1223 readToken (token);
1224 if ( isKeyword(token, KEYWORD_NONE) )
1226 if ( is_class )
1228 addToScope(token, name->string);
1230 else
1231 addContext (name, token);
1233 else if ( isKeyword(token, KEYWORD_prototype) )
1236 * When we reach the "prototype" tag, we infer:
1237 * "BindAgent" is a class
1238 * "build" is a method
1240 * function BindAgent( repeatableIdName, newParentIdName ) {
1243 * CASE 1
1244 * Specified function name: "build"
1245 * BindAgent.prototype.build = function( mode ) {
1246 * maybe parse nested functions
1249 * CASE 2
1250 * Prototype listing
1251 * ValidClassOne.prototype = {
1252 * 'validMethodOne' : function(a,b) {},
1253 * 'validMethodTwo' : function(a,b) {}
1257 makeClassTag (name);
1258 is_class = TRUE;
1261 * There should a ".function_name" next.
1263 readToken (token);
1264 if (isType (token, TOKEN_PERIOD))
1267 * Handle CASE 1
1269 readToken (token);
1270 if ( isKeyword(token, KEYWORD_NONE) )
1272 vStringCopy(saveScope, token->scope);
1273 addToScope(token, name->string);
1274 makeJsTag (token, JSTAG_METHOD);
1276 readToken (method_body_token);
1277 vStringCopy (method_body_token->scope, token->scope);
1279 while (! ( isType (method_body_token, TOKEN_SEMICOLON) ||
1280 isType (method_body_token, TOKEN_CLOSE_CURLY) ||
1281 isType (method_body_token, TOKEN_OPEN_CURLY)) )
1283 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1284 skipArgumentList(method_body_token);
1285 else
1286 readToken (method_body_token);
1289 if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1291 parseBlock (method_body_token, token);
1292 is_terminated = TRUE;
1294 else
1295 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1296 goto cleanUp;
1299 else if (isType (token, TOKEN_EQUAL_SIGN))
1301 readToken (token);
1302 if (isType (token, TOKEN_OPEN_CURLY))
1305 * Handle CASE 2
1307 * Creates tags for each of these class methods
1308 * ValidClassOne.prototype = {
1309 * 'validMethodOne' : function(a,b) {},
1310 * 'validMethodTwo' : function(a,b) {}
1313 parseMethods(token, name);
1315 * Find to the end of the statement
1317 findCmdTerm (token);
1318 token->ignoreTag = FALSE;
1319 is_terminated = TRUE;
1320 goto cleanUp;
1324 readToken (token);
1325 } while (isType (token, TOKEN_PERIOD));
1328 if ( isType (token, TOKEN_OPEN_PAREN) )
1329 skipArgumentList(token);
1331 if ( isType (token, TOKEN_OPEN_SQUARE) )
1332 skipArrayList(token);
1335 if ( isType (token, TOKEN_OPEN_CURLY) )
1337 is_class = parseBlock (token, name);
1342 if ( isType (token, TOKEN_CLOSE_CURLY) )
1345 * Reaching this section without having
1346 * processed an open curly brace indicates
1347 * the statement is most likely not terminated.
1349 is_terminated = FALSE;
1350 goto cleanUp;
1353 if ( isType (token, TOKEN_SEMICOLON) )
1356 * Only create variables for global scope
1358 if ( token->nestLevel == 0 && is_global )
1361 * Handles this syntax:
1362 * var g_var2;
1364 if (isType (token, TOKEN_SEMICOLON))
1365 makeJsTag (name, JSTAG_VARIABLE);
1368 * Statement has ended.
1369 * This deals with calls to functions, like:
1370 * alert(..);
1372 goto cleanUp;
1375 if ( isType (token, TOKEN_EQUAL_SIGN) )
1377 int parenDepth = 0;
1379 readToken (token);
1381 /* rvalue might be surrounded with parentheses */
1382 while (isType (token, TOKEN_OPEN_PAREN))
1384 parenDepth++;
1385 readToken (token);
1388 if ( isKeyword (token, KEYWORD_function) )
1390 readToken (token);
1392 if ( isKeyword (token, KEYWORD_NONE) &&
1393 ! isType (token, TOKEN_OPEN_PAREN) )
1396 * Functions of this format:
1397 * var D2A = function theAdd(a, b)
1399 * return a+b;
1401 * Are really two separate defined functions and
1402 * can be referenced in two ways:
1403 * alert( D2A(1,2) ); // produces 3
1404 * alert( theAdd(1,2) ); // also produces 3
1405 * So it must have two tags:
1406 * D2A
1407 * theAdd
1408 * Save the reference to the name for later use, once
1409 * we have established this is a valid function we will
1410 * create the secondary reference to it.
1412 copyToken(secondary_name, token);
1413 readToken (token);
1416 if ( isType (token, TOKEN_OPEN_PAREN) )
1417 skipArgumentList(token);
1419 if (isType (token, TOKEN_OPEN_CURLY))
1422 * This will be either a function or a class.
1423 * We can only determine this by checking the body
1424 * of the function. If we find a "this." we know
1425 * it is a class, otherwise it is a function.
1427 if ( is_inside_class )
1429 makeJsTag (name, JSTAG_METHOD);
1430 if ( vStringLength(secondary_name->string) > 0 )
1431 makeFunctionTag (secondary_name);
1432 parseBlock (token, name);
1434 else
1436 is_class = parseBlock (token, name);
1437 if ( is_class )
1438 makeClassTag (name);
1439 else
1440 makeFunctionTag (name);
1442 if ( vStringLength(secondary_name->string) > 0 )
1443 makeFunctionTag (secondary_name);
1447 else if (isType (token, TOKEN_OPEN_CURLY))
1450 * Creates tags for each of these class methods
1451 * ValidClassOne.prototype = {
1452 * 'validMethodOne' : function(a,b) {},
1453 * 'validMethodTwo' : function(a,b) {}
1455 * Or checks if this is a hash variable.
1456 * var z = {};
1458 has_methods = parseMethods(token, name);
1459 if (has_methods)
1460 makeJsTag (name, JSTAG_CLASS);
1461 else
1464 * Only create variables for global scope
1466 if ( token->nestLevel == 0 && is_global )
1469 * A pointer can be created to the function.
1470 * If we recognize the function/class name ignore the variable.
1471 * This format looks identical to a variable definition.
1472 * A variable defined outside of a block is considered
1473 * a global variable:
1474 * var g_var1 = 1;
1475 * var g_var2;
1476 * This is not a global variable:
1477 * var g_var = function;
1478 * This is a global variable:
1479 * var g_var = different_var_name;
1481 fulltag = vStringNew ();
1482 if (vStringLength (token->scope) > 0)
1484 vStringCopy(fulltag, token->scope);
1485 vStringCatS (fulltag, ".");
1486 vStringCatS (fulltag, vStringValue(token->string));
1488 else
1490 vStringCopy(fulltag, token->string);
1492 vStringTerminate(fulltag);
1493 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1494 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1496 makeJsTag (name, JSTAG_VARIABLE);
1498 vStringDelete (fulltag);
1501 if (isType (token, TOKEN_CLOSE_CURLY))
1504 * Assume the closing parantheses terminates
1505 * this statements.
1507 is_terminated = TRUE;
1510 else if (isKeyword (token, KEYWORD_new))
1512 readToken (token);
1513 is_var = isType (token, TOKEN_IDENTIFIER);
1514 if ( isKeyword (token, KEYWORD_function) ||
1515 isKeyword (token, KEYWORD_capital_function) ||
1516 isKeyword (token, KEYWORD_capital_object) ||
1517 is_var )
1519 if ( isKeyword (token, KEYWORD_capital_object) )
1520 is_class = TRUE;
1522 readToken (token);
1523 if ( isType (token, TOKEN_OPEN_PAREN) )
1524 skipArgumentList(token);
1526 if (isType (token, TOKEN_SEMICOLON))
1528 if ( token->nestLevel == 0 )
1530 if ( is_var )
1532 makeJsTag (name, JSTAG_VARIABLE);
1534 else
1536 if ( is_class )
1538 makeClassTag (name);
1539 } else {
1540 makeFunctionTag (name);
1545 else if (isType (token, TOKEN_CLOSE_CURLY))
1546 is_terminated = FALSE;
1549 else if (isKeyword (token, KEYWORD_NONE))
1552 * Only create variables for global scope
1554 if ( token->nestLevel == 0 && is_global )
1557 * A pointer can be created to the function.
1558 * If we recognize the function/class name ignore the variable.
1559 * This format looks identical to a variable definition.
1560 * A variable defined outside of a block is considered
1561 * a global variable:
1562 * var g_var1 = 1;
1563 * var g_var2;
1564 * This is not a global variable:
1565 * var g_var = function;
1566 * This is a global variable:
1567 * var g_var = different_var_name;
1569 fulltag = vStringNew ();
1570 if (vStringLength (token->scope) > 0)
1572 vStringCopy(fulltag, token->scope);
1573 vStringCatS (fulltag, ".");
1574 vStringCatS (fulltag, vStringValue(token->string));
1576 else
1578 vStringCopy(fulltag, token->string);
1580 vStringTerminate(fulltag);
1581 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1582 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1584 makeJsTag (name, JSTAG_VARIABLE);
1586 vStringDelete (fulltag);
1590 if (parenDepth > 0)
1592 while (parenDepth > 0)
1594 if (isType (token, TOKEN_OPEN_PAREN))
1595 parenDepth++;
1596 else if (isType (token, TOKEN_CLOSE_PAREN))
1597 parenDepth--;
1598 readToken (token);
1600 if (isType (token, TOKEN_CLOSE_CURLY))
1601 is_terminated = FALSE;
1605 /* if we aren't already at the cmd end, advance to it and check whether
1606 * the statement was terminated */
1607 if (! isType (token, TOKEN_CLOSE_CURLY) &&
1608 ! isType (token, TOKEN_SEMICOLON))
1610 findCmdTerm (token);
1613 * Statements can be optionally terminated in the case of
1614 * statement prior to a close curly brace as in the
1615 * document.write line below:
1617 * function checkForUpdate() {
1618 * if( 1==1 ) {
1619 * document.write("hello from checkForUpdate<br>")
1621 * return 1;
1624 if (isType (token, TOKEN_CLOSE_CURLY))
1625 is_terminated = FALSE;
1628 cleanUp:
1629 vStringCopy(token->scope, saveScope);
1630 deleteToken (name);
1631 deleteToken (secondary_name);
1632 deleteToken (method_body_token);
1633 vStringDelete(saveScope);
1635 return is_terminated;
1638 static void parseUI5 (tokenInfo *const token)
1640 tokenInfo *const name = newToken ();
1642 * SAPUI5 is built on top of jQuery.
1643 * It follows a standard format:
1644 * sap.ui.controller("id.of.controller", {
1645 * method_name : function... {
1646 * },
1648 * method_name : function ... {
1652 * Handle the parsing of the initial controller (and the
1653 * same for "view") and then allow the methods to be
1654 * parsed as usual.
1657 readToken (token);
1659 if (isType (token, TOKEN_PERIOD))
1661 readToken (token);
1662 while (! isType (token, TOKEN_OPEN_PAREN) )
1664 readToken (token);
1666 readToken (token);
1668 if (isType (token, TOKEN_STRING))
1670 copyToken(name, token);
1671 readToken (token);
1674 if (isType (token, TOKEN_COMMA))
1675 readToken (token);
1679 parseMethods (token, name);
1680 } while (! isType (token, TOKEN_CLOSE_CURLY) );
1683 deleteToken (name);
1686 static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
1688 boolean is_terminated = TRUE;
1690 * Detect the common statements, if, while, for, do, ...
1691 * This is necessary since the last statement within a block "{}"
1692 * can be optionally terminated.
1694 * If the statement is not terminated, we need to tell
1695 * the calling routine to prevent reading an additional token
1696 * looking for the end of the statement.
1699 if (isType(token, TOKEN_KEYWORD))
1701 switch (token->keyword)
1703 case KEYWORD_for:
1704 case KEYWORD_while:
1705 case KEYWORD_do:
1706 is_terminated = parseLoop (token);
1707 break;
1708 case KEYWORD_if:
1709 case KEYWORD_else:
1710 case KEYWORD_try:
1711 case KEYWORD_catch:
1712 case KEYWORD_finally:
1713 /* Common semantics */
1714 is_terminated = parseIf (token);
1715 break;
1716 case KEYWORD_switch:
1717 parseSwitch (token);
1718 break;
1719 case KEYWORD_return:
1720 findCmdTerm (token);
1721 is_terminated = isType (token, TOKEN_SEMICOLON);
1722 break;
1723 default:
1724 is_terminated = parseStatement (token, is_inside_class);
1725 break;
1728 else
1731 * Special case where single line statements may not be
1732 * SEMICOLON terminated. parseBlock needs to know this
1733 * so that it does not read the next token.
1735 is_terminated = parseStatement (token, is_inside_class);
1737 return is_terminated;
1740 static void parseJsFile (tokenInfo *const token)
1744 readToken (token);
1746 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1747 parseFunction (token);
1748 else if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
1749 parseUI5 (token);
1750 else
1751 parseLine (token, FALSE);
1752 } while (TRUE);
1755 static void initialize (const langType language)
1757 Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
1758 Lang_js = language;
1759 buildJsKeywordHash ();
1762 static void findJsTags (void)
1764 tokenInfo *const token = newToken ();
1765 exception_t exception;
1767 ClassNames = stringListNew ();
1768 FunctionNames = stringListNew ();
1769 LastTokenType = TOKEN_UNDEFINED;
1771 exception = (exception_t) (setjmp (Exception));
1772 while (exception == ExceptionNone)
1773 parseJsFile (token);
1775 stringListDelete (ClassNames);
1776 stringListDelete (FunctionNames);
1777 ClassNames = NULL;
1778 FunctionNames = NULL;
1779 deleteToken (token);
1782 /* Create parser definition stucture */
1783 extern parserDefinition* JavaScriptParser (void)
1785 static const char *const extensions [] = { "js", NULL };
1786 parserDefinition *const def = parserNew ("JavaScript");
1787 def->extensions = extensions;
1789 * New definitions for parsing instead of regex
1791 def->kinds = JsKinds;
1792 def->kindCount = KIND_COUNT (JsKinds);
1793 def->parser = findJsTags;
1794 def->initialize = initialize;
1796 return def;
1798 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */