Replace utils_make_human_readable_str() with g_format_size()
[geany-mirror.git] / ctags / parsers / julia.c
blob3f433e6a15a2f79dccd3576636da40e57cc7f9b9
1 /*
2 * Copyright (c) 2020-2021, getzze <getzze@gmail.com>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for Julia files.
9 * Documented 'kinds':
10 * https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
11 * Language parser in Scheme:
12 * https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
16 * INCLUDE FILES
18 #include "general.h" /* must always come first */
20 #include <string.h>
22 #include "keyword.h"
23 #include "parse.h"
24 #include "entry.h"
25 #include "options.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 #include "xtag.h"
32 * MACROS
34 #define MAX_STRING_LENGTH 256
37 * DATA DEFINITIONS
39 typedef enum {
40 K_CONSTANT,
41 K_FUNCTION,
42 K_FIELD,
43 K_MACRO,
44 K_MODULE,
45 K_STRUCT,
46 K_TYPE,
47 K_UNKNOWN,
48 K_NONE
49 } JuliaKind;
51 typedef enum {
52 JULIA_MODULE_IMPORTED,
53 JULIA_MODULE_USED,
54 JULIA_MODULE_NAMESPACE,
55 } juliaModuleRole;
57 typedef enum {
58 JULIA_UNKNOWN_IMPORTED,
59 JULIA_UNKNOWN_USED,
60 } juliaUnknownRole;
63 * using X X = (kind:module, role:used)
65 * using X: a, b X = (kind:module, role:namespace)
66 * a, b = (kind:unknown, role:used, scope:module:X)
68 * import X X = (kind:module, role:imported)
70 * import X.a, Y.b X, Y = (kind:module, role:namespace)
71 * a, b = (kind:unknown, role:imported, scope:module:X)
73 * import X: a, b Same as the above one
75 static roleDefinition JuliaModuleRoles [] = {
76 { true, "imported", "loaded by \"import\"" },
77 { true, "used", "loaded by \"using\"" },
78 { true, "namespace", "only some symbols in it are imported" },
81 static roleDefinition JuliaUnknownRoles [] = {
82 { true, "imported", "loaded by \"import\"" },
83 { true, "used", "loaded by \"using\""},
86 static kindDefinition JuliaKinds [] = {
87 { true, 'c', "constant", "Constants" },
88 { true, 'f', "function", "Functions" },
89 { true, 'g', "field", "Fields" },
90 { true, 'm', "macro", "Macros" },
91 { true, 'n', "module", "Modules",
92 ATTACH_ROLES(JuliaModuleRoles) },
93 { true, 's', "struct", "Structures" },
94 { true, 't', "type", "Types" },
95 { true, 'x', "unknown", "name defined in other modules",
96 .referenceOnly = true, ATTACH_ROLES(JuliaUnknownRoles) },
99 typedef enum {
100 TOKEN_NONE=0, /* none */
101 TOKEN_WHITESPACE,
102 TOKEN_PAREN_BLOCK,
103 TOKEN_BRACKET_BLOCK,
104 TOKEN_CURLY_BLOCK,
105 TOKEN_OPEN_BLOCK,
106 TOKEN_CLOSE_BLOCK,
107 TOKEN_TYPE_ANNOTATION,
108 TOKEN_TYPE_WHERE,
109 TOKEN_CONST,
110 TOKEN_STRING, /* = 10 */
111 TOKEN_COMMAND,
112 TOKEN_MACROCALL,
113 TOKEN_IDENTIFIER,
114 TOKEN_MODULE,
115 TOKEN_MACRO,
116 TOKEN_FUNCTION,
117 TOKEN_STRUCT,
118 TOKEN_ENUM,
119 TOKEN_TYPE,
120 TOKEN_IMPORT, /* = 20 */
121 TOKEN_USING,
122 TOKEN_EXPORT,
123 TOKEN_NEWLINE,
124 TOKEN_SEMICOLON,
125 TOKEN_COMPOSER_KWD, /* KEYWORD only */
126 TOKEN_EOF,
127 TOKEN_COUNT
128 } tokenType;
130 static const keywordTable JuliaKeywordTable [] = {
131 /* TODO: Sort by keys. */
132 { "mutable", TOKEN_COMPOSER_KWD },
133 { "primitive", TOKEN_COMPOSER_KWD },
134 { "abstract", TOKEN_COMPOSER_KWD },
136 { "if", TOKEN_OPEN_BLOCK },
137 { "for", TOKEN_OPEN_BLOCK },
138 { "while", TOKEN_OPEN_BLOCK },
139 { "try", TOKEN_OPEN_BLOCK },
140 { "do", TOKEN_OPEN_BLOCK },
141 { "begin", TOKEN_OPEN_BLOCK },
142 { "let", TOKEN_OPEN_BLOCK },
143 { "quote", TOKEN_OPEN_BLOCK },
145 { "module", TOKEN_MODULE },
146 { "baremodule",TOKEN_MODULE },
148 { "using", TOKEN_USING },
149 { "import", TOKEN_IMPORT },
151 { "export", TOKEN_EXPORT },
152 { "const", TOKEN_CONST },
153 { "macro", TOKEN_MACRO },
154 { "function", TOKEN_FUNCTION },
155 { "struct", TOKEN_STRUCT },
156 { "type", TOKEN_TYPE },
157 { "where", TOKEN_TYPE_WHERE },
158 { "end", TOKEN_CLOSE_BLOCK },
161 typedef struct {
162 /* Characters */
163 int prev_c;
164 int cur_c;
165 int next_c;
167 /* Tokens */
168 bool first_token;
169 int cur_token;
170 vString* token_str;
171 unsigned long line;
172 MIOPos pos;
173 } lexerState;
176 * FUNCTION PROTOTYPES
179 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope);
181 static void scanParenBlock (lexerState *lexer);
184 * FUNCTION DEFINITIONS
187 static int endswith(const char* what, const char* withwhat)
189 int l1 = strlen(what);
190 int l2 = strlen(withwhat);
191 if (l2 > l1)
193 return 0;
196 return strcmp(withwhat, what + (l1 - l2)) == 0;
199 /* Resets the scope string to the old length */
200 static void resetScope (vString *scope, size_t old_len)
202 vStringTruncate (scope, old_len);
205 /* Adds a name to the end of the scope string */
206 static void addToScope (vString *scope, vString *name)
208 if (vStringLength(scope) > 0)
210 vStringPut(scope, '.');
212 vStringCat(scope, name);
215 /* Reads a character from the file */
216 static void advanceChar (lexerState *lexer)
218 lexer->prev_c = lexer->cur_c;
219 lexer->cur_c = lexer->next_c;
220 lexer->next_c = getcFromInputFile();
223 /* Reads N characters from the file */
224 static void advanceNChar (lexerState *lexer, int n)
226 while (n--)
228 advanceChar(lexer);
232 /* Store the current character in lexerState::token_str if there is space
233 * (set by MAX_STRING_LENGTH), and then read the next character from the file */
234 static void advanceAndStoreChar (lexerState *lexer)
236 if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
238 vStringPut(lexer->token_str, (char) lexer->cur_c);
240 advanceChar(lexer);
243 static bool isWhitespace (int c, bool newline)
245 if (newline)
247 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
249 return c == ' ' || c == '\t';
252 static bool isAscii (int c)
254 return (c >= 0) && (c < 0x80);
257 static bool isOperator (int c)
259 if (c == '%' || c == '^' || c == '&' || c == '|' ||
260 c == '*' || c == '-' || c == '+' || c == '~' ||
261 c == '<' || c == '>' || c == ',' || c == '/' ||
262 c == '?' || c == '=' || c == ':' )
264 return true;
266 return false;
269 /* This does not distinguish Unicode letters from operators... */
270 static bool isIdentifierFirstCharacter (int c)
272 return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0);
275 /* This does not distinguish Unicode letters from operators... */
276 static bool isIdentifierCharacter (int c)
278 return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!')) || c >= 0x80);
281 static void skipWhitespace (lexerState *lexer, bool newline)
283 while (isWhitespace(lexer->cur_c, newline))
285 advanceChar(lexer);
289 /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
290 static bool isTranspose (int c)
292 return (isIdentifierCharacter(c) || c == ')' || c == ']');
297 * Lexer functions
298 * */
300 /* Check that the current character sequence is a type declaration or inheritance */
301 static bool isTypeDecl (lexerState *lexer)
303 if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') ||
304 (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') ||
305 (lexer->cur_c == ':' && lexer->next_c == ':') )
307 return true;
309 return false;
312 /* Check if the current char is a new line */
313 static bool isNewLine (lexerState *lexer)
315 return (lexer->cur_c == '\n')? true: false;
318 /* Check if the current char is a new line.
319 * If it is, skip the newline and return true */
320 static bool skipNewLine (lexerState *lexer)
322 if (isNewLine(lexer))
324 advanceChar(lexer);
325 return true;
327 return false;
330 /* Skip a single comment or multiline comment
331 * A single line comment starts with #
332 * A multi-line comment is encapsulated in #=...=# and they are nesting
333 * */
334 static void skipComment (lexerState *lexer)
336 /* # */
337 if (lexer->next_c != '=')
339 advanceNChar(lexer, 1);
340 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
342 advanceChar(lexer);
345 /* block comment */
346 else /* if (lexer->next_c == '=') */
348 int level = 1;
349 advanceNChar(lexer, 2);
350 while (lexer->cur_c != EOF && level > 0)
352 if (lexer->cur_c == '=' && lexer->next_c == '#')
354 level--;
355 advanceNChar(lexer, 2);
357 else if (lexer->cur_c == '#' && lexer->next_c == '=')
359 level++;
360 advanceNChar(lexer, 2);
362 else
364 advanceChar(lexer);
370 static void scanIdentifier (lexerState *lexer, bool clear)
372 if (clear)
374 vStringClear(lexer->token_str);
379 advanceAndStoreChar(lexer);
380 } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
383 /* Scan a quote-like expression.
384 * Allow for triple-character variand and interpolation with `$`.
385 * These last past the end of the line, so be careful
386 * not to store too much of them (see MAX_STRING_LENGTH). */
387 static void scanStringOrCommand (lexerState *lexer, int c)
389 bool istriple = false;
391 /* Pass the first "quote"-character */
392 advanceAndStoreChar(lexer);
394 /* Check for triple "quote"-character */
395 if (lexer->cur_c == c && lexer->next_c == c)
397 istriple = true;
398 advanceAndStoreChar(lexer);
399 advanceAndStoreChar(lexer);
401 /* Cancel up to 2 "quote"-characters after opening the triple */
402 if (lexer->cur_c == c)
404 advanceAndStoreChar(lexer);
405 if (lexer->cur_c == c)
407 advanceAndStoreChar(lexer);
412 while (lexer->cur_c != EOF && lexer->cur_c != c)
414 /* Check for interpolation before checking for end of "quote" */
415 if (lexer->cur_c == '$' && lexer->next_c == '(')
417 advanceAndStoreChar(lexer);
418 scanParenBlock(lexer);
419 /* continue to avoid advance character again. Correct bug
420 * with "quote"-character just after closing parenthesis */
421 continue;
424 if (lexer->cur_c == '\\' &&
425 (lexer->next_c == c || lexer->next_c == '\\'))
427 advanceAndStoreChar(lexer);
429 advanceAndStoreChar(lexer);
431 /* Cancel up to 2 "quote"-characters if triple string */
432 if (istriple && lexer->cur_c == c)
434 advanceAndStoreChar(lexer);
435 if (lexer->cur_c == c)
437 advanceAndStoreChar(lexer);
441 /* Pass the last "quote"-character */
442 advanceAndStoreChar(lexer);
446 /* Scan commands surrounded by backticks,
447 * possibly triple backticks */
448 static void scanCommand (lexerState *lexer)
450 scanStringOrCommand(lexer, '`');
453 /* Double-quoted strings,
454 * possibly triple doublequotes */
455 static void scanString (lexerState *lexer)
457 scanStringOrCommand(lexer, '"');
461 /* This deals with character literals: 'n', '\n', '\uFFFF';
462 * and matrix transpose: A'.
463 * We'll use this approximate regexp for the literals:
464 * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
465 * Either way, we'll treat this token as a string, so it gets preserved */
466 static bool scanCharacterOrTranspose (lexerState *lexer)
468 if (isTranspose(lexer->prev_c))
470 /* deal with untranspose/transpose sequence */
471 while (lexer->cur_c != EOF && lexer->cur_c == '\'')
473 advanceAndStoreChar(lexer);
475 return false;
478 //vStringClear(lexer->token_str);
479 advanceAndStoreChar(lexer);
481 if (lexer->cur_c == '\\')
483 advanceAndStoreChar(lexer);
484 /* The \' \\ \' \' (literally '\'') case */
485 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
487 advanceAndStoreChar(lexer);
488 advanceAndStoreChar(lexer);
490 /* The \' \\ [^']+ \' case */
491 else
493 while (lexer->cur_c != EOF && lexer->cur_c != '\'')
495 advanceAndStoreChar(lexer);
499 /* The \' [^'] \' and \' \' \' cases */
500 else if (lexer->next_c == '\'')
502 advanceAndStoreChar(lexer);
503 advanceAndStoreChar(lexer);
505 /* Otherwise it is malformed */
506 return true;
509 /* Parse a block with opening and closing character */
510 static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline)
512 /* Assume the current char is `open` */
513 int level = 1;
515 /* Pass the first opening */
516 advanceAndStoreChar(lexer);
518 while (lexer->cur_c != EOF && level > 0)
520 /* Parse everything */
521 if (lexer->cur_c == ' ' || lexer->cur_c == '\t')
523 skipWhitespace(lexer, false);
524 vStringPut(lexer->token_str, ' ');
526 if (lexer->cur_c == '#')
528 skipComment(lexer);
530 else if (lexer->cur_c == '\"')
532 scanString(lexer);
534 else if (lexer->cur_c == '\'')
536 scanCharacterOrTranspose(lexer);
539 /* Parse opening/closing */
540 if (lexer->cur_c == open)
542 level++;
544 else if (lexer->cur_c == close)
546 level--;
549 if (convert_newline && skipNewLine(lexer))
551 vStringPut(lexer->token_str, ' ');
553 else
555 advanceAndStoreChar(lexer);
559 /* Lexer position is just after `close` */
563 /* Parse a block inside parenthesis, for example a function argument list */
564 static void scanParenBlock (lexerState *lexer)
566 scanBlock(lexer, '(', ')', true);
569 /* Indexing block with bracket.
570 * Some keywords have a special meaning in this environment:
571 * end, begin, for and if */
572 static void scanIndexBlock (lexerState *lexer)
574 scanBlock(lexer, '[', ']', false);
578 /* Parse a block inside curly brackets, for type parametrization */
579 static void scanCurlyBlock (lexerState *lexer)
581 scanBlock(lexer, '{', '}', true);
584 /* Scan type annotation like
585 * `::Type`, `::Type{T}`
587 static void scanTypeAnnotation (lexerState *lexer)
589 /* assume that current char is '<', '>' or ':', followed by ':' */
590 advanceAndStoreChar(lexer);
591 advanceAndStoreChar(lexer);
593 skipWhitespace(lexer, true);
594 scanIdentifier(lexer, false);
595 if (lexer->cur_c == '{')
597 scanCurlyBlock(lexer);
601 /* Scan type annotation like
602 * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
604 static void scanTypeWhere (lexerState *lexer)
606 /* assume that current token is 'where'
607 * allow line continuation */
608 vStringPut(lexer->token_str, ' ');
609 skipWhitespace(lexer, true);
611 while (lexer->cur_c != EOF)
614 if (lexer->cur_c == '{')
616 scanCurlyBlock(lexer);
618 else if (isIdentifierFirstCharacter(lexer->cur_c))
620 scanIdentifier(lexer, false);
621 if (endswith(vStringValue(lexer->token_str), "where"))
623 /* allow line continuation */
624 vStringPut(lexer->token_str, ' ');
625 skipWhitespace(lexer, true);
628 else if (isTypeDecl(lexer))
630 scanTypeAnnotation(lexer);
631 //skipWhitespace(lexer, false);
633 else if (lexer->cur_c == '#')
635 skipComment(lexer);
636 /* allow line continuation */
637 if (endswith(vStringValue(lexer->token_str), "where "))
639 skipWhitespace(lexer, true);
642 else if (isWhitespace(lexer->cur_c, false))
644 while (isWhitespace(lexer->cur_c, false))
646 advanceChar(lexer);
648 /* Add a space, if it is not a trailing space */
649 if (!(isNewLine(lexer)))
651 vStringPut(lexer->token_str, ' ');
654 else
656 break;
662 static int parseIdentifier (lexerState *lexer)
664 langType julia = getInputLanguage ();
665 scanIdentifier(lexer, true);
667 int k = lookupKeyword (vStringValue(lexer->token_str), julia);
668 /* First part of a composed identifier */
669 if (k == TOKEN_COMPOSER_KWD)
671 skipWhitespace(lexer, false);
672 scanIdentifier(lexer, true);
673 k = lookupKeyword (vStringValue(lexer->token_str), julia);
676 if ((k == TOKEN_OPEN_BLOCK)
677 || (k == TOKEN_MODULE)
678 || (k == TOKEN_IMPORT)
679 || (k == TOKEN_USING)
680 || (k == TOKEN_EXPORT)
681 || (k == TOKEN_CONST)
682 || (k == TOKEN_MACRO)
683 || (k == TOKEN_FUNCTION)
684 || (k == TOKEN_STRUCT)
685 || (k == TOKEN_TYPE)
686 || (k == TOKEN_TYPE_WHERE)
687 || (k == TOKEN_CLOSE_BLOCK))
689 if (k == TOKEN_TYPE_WHERE)
691 scanTypeWhere(lexer);
693 return lexer->cur_token = k;
695 return lexer->cur_token = TOKEN_IDENTIFIER;
699 /* Advances the parser one token, optionally skipping whitespace
700 * (otherwise it is concatenated and returned as a single whitespace token).
701 * Whitespace is needed to properly render function signatures. Unrecognized
702 * token starts are stored literally, e.g. token may equal to a character '#'. */
703 static int advanceToken (lexerState *lexer, bool skip_whitespace, bool propagate_first)
705 bool have_whitespace = false;
706 bool newline = false;
707 lexer->line = getInputLineNumber();
708 lexer->pos = getInputFilePosition();
710 /* the next token is the first token of the line */
711 if (!propagate_first)
713 if (lexer->cur_token == TOKEN_NEWLINE ||
714 lexer->cur_token == TOKEN_SEMICOLON ||
715 lexer->cur_token == TOKEN_NONE ||
716 (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL))
718 lexer->first_token = true;
720 else
722 lexer->first_token = false;
726 while (lexer->cur_c != EOF)
728 /* skip whitespaces but not newlines */
729 if (isWhitespace(lexer->cur_c, newline))
731 skipWhitespace(lexer, newline);
732 have_whitespace = true;
734 else if (lexer->cur_c == '#')
736 skipComment(lexer);
737 have_whitespace = true;
739 else
741 if (have_whitespace && !skip_whitespace)
743 return lexer->cur_token = TOKEN_WHITESPACE;
745 break;
748 lexer->line = getInputLineNumber();
749 lexer->pos = getInputFilePosition();
750 while (lexer->cur_c != EOF)
752 if (lexer->cur_c == '"')
754 vStringClear(lexer->token_str);
755 scanString(lexer);
756 return lexer->cur_token = TOKEN_STRING;
758 else if (lexer->cur_c == '\'')
760 vStringClear(lexer->token_str);
761 if (scanCharacterOrTranspose(lexer))
763 return lexer->cur_token = TOKEN_STRING;
765 else
767 return lexer->cur_token = '\'';
770 else if (lexer->cur_c == '`')
772 vStringClear(lexer->token_str);
773 scanCommand(lexer);
774 return lexer->cur_token = TOKEN_COMMAND;
776 else if (isIdentifierFirstCharacter(lexer->cur_c))
778 return parseIdentifier(lexer);
780 else if (lexer->cur_c == '@')
782 vStringClear(lexer->token_str);
783 advanceAndStoreChar(lexer);
786 advanceAndStoreChar(lexer);
787 } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c));
788 return lexer->cur_token = TOKEN_MACROCALL;
790 else if (lexer->cur_c == '(')
792 vStringClear(lexer->token_str);
793 scanParenBlock(lexer);
794 return lexer->cur_token = TOKEN_PAREN_BLOCK;
796 else if (lexer->cur_c == '[')
798 vStringClear(lexer->token_str);
799 scanIndexBlock(lexer);
800 return lexer->cur_token = TOKEN_BRACKET_BLOCK;
802 else if (lexer->cur_c == '{')
804 vStringClear(lexer->token_str);
805 scanCurlyBlock(lexer);
806 return lexer->cur_token = TOKEN_CURLY_BLOCK;
808 else if (isTypeDecl(lexer))
810 vStringClear(lexer->token_str);
811 scanTypeAnnotation(lexer);
812 return lexer->cur_token = TOKEN_TYPE_ANNOTATION;
814 else if (skipNewLine(lexer))
816 /* allow line continuation */
817 if (isOperator(lexer->cur_token))
819 return lexer->cur_token;
821 return lexer->cur_token = TOKEN_NEWLINE;
823 else if (lexer->cur_c == ';')
825 advanceChar(lexer);
826 return lexer->cur_token = TOKEN_SEMICOLON;
828 else
830 int c = lexer->cur_c;
831 advanceChar(lexer);
832 return lexer->cur_token = c;
835 return lexer->cur_token = TOKEN_EOF;
838 static void initLexer (lexerState *lexer)
840 advanceNChar(lexer, 2);
841 lexer->token_str = vStringNew();
842 lexer->first_token = true;
843 lexer->cur_token = TOKEN_NONE;
844 lexer->prev_c = '\0';
846 if (lexer->cur_c == '#' && lexer->next_c == '!')
848 skipComment(lexer);
850 advanceToken(lexer, true, false);
853 static void deInitLexer (lexerState *lexer)
855 vStringDelete(lexer->token_str);
856 lexer->token_str = NULL;
859 #if 0
860 static void debugLexer (lexerState *lexer)
862 printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c);
863 printf(vStringValue(lexer->token_str));
864 printf("`\n");
866 #endif
868 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
870 if (kind == K_NONE)
872 return;
874 tagEntryInfo tag;
875 initTagEntry(&tag, vStringValue(ident), kind);
877 tag.lineNumber = line;
878 tag.filePosition = pos;
879 tag.sourceFileName = getInputFileName();
881 tag.extensionFields.signature = arg_list;
882 /* tag.extensionFields.varType = type; */ /* Needs a workaround */
883 if (parent_kind != K_NONE)
885 tag.extensionFields.scopeKindIndex = parent_kind;
886 tag.extensionFields.scopeName = vStringValue(scope);
888 makeTagEntry(&tag);
891 static void addReferenceTag (vString* ident, int kind, int role, unsigned long line, MIOPos pos, vString* scope, int parent_kind)
893 if (kind == K_NONE)
895 return;
897 tagEntryInfo tag;
898 initRefTagEntry(&tag, vStringValue(ident), kind, role);
899 tag.lineNumber = line;
900 tag.filePosition = pos;
901 if (parent_kind != K_NONE)
903 tag.extensionFields.scopeKindIndex = parent_kind;
904 tag.extensionFields.scopeName = vStringValue(scope);
906 makeTagEntry(&tag);
909 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
910 * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
911 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
913 int block_level = 0;
915 while (lexer->cur_token != TOKEN_EOF)
917 /* check if the keyword is reached, only if outside a block */
918 if (block_level == 0)
920 int ii = 0;
921 for(ii = 0; ii < num_goal_tokens; ii++)
923 if (lexer->cur_token == goal_tokens[ii])
925 break;
928 if (ii < num_goal_tokens)
930 /* parse the next token */
931 advanceToken(lexer, true, false);
932 break;
936 /* take into account nested blocks */
937 switch (lexer->cur_token)
939 case TOKEN_OPEN_BLOCK:
940 block_level++;
941 break;
942 case TOKEN_CLOSE_BLOCK:
943 block_level--;
944 break;
945 default:
946 break;
949 /* Has to be after the token switch to catch the case when we start with the initial level token */
950 if (num_goal_tokens == 0 && block_level == 0)
952 break;
955 advanceToken(lexer, true, false);
959 /* Skip until the end of the block */
960 static void skipUntilEnd (lexerState *lexer)
962 int goal_tokens[] = { TOKEN_CLOSE_BLOCK };
964 skipUntil(lexer, goal_tokens, 1);
967 /* Skip a function body after assignment operator '='
968 * Beware of continuation lines after operators
969 * */
970 static void skipBody (lexerState *lexer)
972 /* assume position just after '=' */
973 while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE)
975 advanceToken(lexer, true, false);
977 if (lexer->cur_token == TOKEN_OPEN_BLOCK)
979 /* pass the keyword */
980 advanceToken(lexer, true, false);
981 skipUntilEnd(lexer);
982 /* the next token is already selected */
987 /* Short function format:
988 * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
989 * */
990 static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind)
992 /* assume the current char is just after identifier */
993 vString *name;
994 vString *arg_list;
995 unsigned long line;
996 MIOPos pos;
998 /* should be an open parenthesis after identifier
999 * with potentially parametric type */
1000 skipWhitespace(lexer, false);
1001 if (lexer->cur_c == '{')
1003 scanCurlyBlock(lexer);
1004 skipWhitespace(lexer, false);
1007 if (lexer->cur_c != '(')
1009 advanceToken(lexer, true, false);
1010 return;
1013 name = vStringNewCopy(lexer->token_str);
1014 line = lexer->line;
1015 pos = lexer->pos;
1017 /* scan argument list */
1018 advanceToken(lexer, true, false);
1019 arg_list = vStringNewCopy(lexer->token_str);
1021 /* scan potential type casting */
1022 advanceToken(lexer, true, false);
1023 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1025 vStringCat(arg_list, lexer->token_str);
1026 advanceToken(lexer, true, false);
1028 /* scan potential type union with 'where' */
1029 if (lexer->cur_token == TOKEN_TYPE_WHERE)
1031 vStringPut(arg_list, ' ');
1032 vStringCat(arg_list, lexer->token_str);
1033 advanceToken(lexer, true, false);
1036 /* scan equal sign, ignore `==` and `=>` */
1037 if (!(lexer->cur_token == '=' &&
1038 lexer->cur_c != '=' &&
1039 lexer->cur_c != '>'))
1041 vStringDelete(name);
1042 vStringDelete(arg_list);
1043 return;
1046 addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind);
1048 /* scan until end of function definition */
1049 skipBody(lexer);
1051 /* Should end on a new line, parse next token */
1052 advanceToken(lexer, true, false);
1053 lexer->first_token = true;
1055 vStringDelete(name);
1056 vStringDelete(arg_list);
1059 /* Function format:
1060 * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
1061 * */
1062 static void parseFunction (lexerState *lexer, vString *scope, int parent_kind)
1064 vString *name;
1065 vString *arg_list;
1066 vString *local_scope;
1067 int local_parent_kind;
1068 unsigned long line;
1069 MIOPos pos;
1071 advanceToken(lexer, true, false);
1072 if (lexer->cur_token != TOKEN_IDENTIFIER)
1074 return;
1076 else if (lexer->cur_c == '.')
1078 local_scope = vStringNewCopy(lexer->token_str);
1079 local_parent_kind = K_MODULE;
1080 advanceChar(lexer);
1081 advanceToken(lexer, true, false);
1083 else
1085 local_scope = vStringNewCopy(scope);
1086 local_parent_kind = parent_kind;
1089 /* Scan for parametric type constructor */
1090 skipWhitespace(lexer, false);
1091 if (lexer->cur_c == '{')
1093 scanCurlyBlock(lexer);
1094 skipWhitespace(lexer, false);
1097 name = vStringNewCopy(lexer->token_str);
1098 arg_list = vStringNew();
1099 line = lexer->line;
1100 pos = lexer->pos;
1102 advanceToken(lexer, true, false);
1103 if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1105 vStringCopy(arg_list, lexer->token_str);
1107 /* scan potential type casting */
1108 advanceToken(lexer, true, false);
1109 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1111 vStringCat(arg_list, lexer->token_str);
1112 advanceToken(lexer, true, false);
1114 /* scan potential type union with 'where' */
1115 if (lexer->cur_token == TOKEN_TYPE_WHERE)
1117 vStringPut(arg_list, ' ');
1118 vStringCat(arg_list, lexer->token_str);
1119 advanceToken(lexer, true, false);
1122 addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, local_scope, local_parent_kind);
1123 addToScope(scope, name);
1124 parseExpr(lexer, true, K_FUNCTION, scope);
1126 else if (lexer->cur_token == TOKEN_CLOSE_BLOCK)
1128 /* Function without method */
1129 addTag(name, NULL, NULL, K_FUNCTION, line, pos, local_scope, local_parent_kind);
1130 /* Go to the closing 'end' keyword */
1131 skipUntilEnd(lexer);
1134 vStringDelete(name);
1135 vStringDelete(arg_list);
1136 vStringDelete(local_scope);
1139 /* Macro format:
1140 * "macro" <ident>()
1142 static void parseMacro (lexerState *lexer, vString *scope, int parent_kind)
1144 vString *name;
1145 unsigned long line;
1146 MIOPos pos;
1148 advanceToken(lexer, true, false);
1149 if (lexer->cur_token != TOKEN_IDENTIFIER)
1151 return;
1154 name = vStringNewCopy(lexer->token_str);
1155 line = lexer->line;
1156 pos = lexer->pos;
1158 advanceToken(lexer, true, false);
1159 if (lexer->cur_token == TOKEN_PAREN_BLOCK)
1161 addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind);
1164 skipUntilEnd(lexer);
1165 vStringDelete(name);
1168 /* Const format:
1169 * "const" <ident>
1171 static void parseConst (lexerState *lexer, vString *scope, int parent_kind)
1173 vString *name;
1175 advanceToken(lexer, true, false);
1176 if (lexer->cur_token != TOKEN_IDENTIFIER)
1178 return;
1181 name = vStringNewCopy(lexer->token_str);
1183 advanceToken(lexer, true, false);
1184 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1186 addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1187 advanceToken(lexer, true, false);
1189 else
1191 addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind);
1194 vStringDelete(name);
1197 /* Type format:
1198 * [ "abstract" | "primitive" ] "type" <ident>
1200 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
1202 advanceToken(lexer, true, false);
1203 if (lexer->cur_token != TOKEN_IDENTIFIER)
1205 return;
1208 addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
1210 skipUntilEnd(lexer);
1213 /* Module format:
1214 * [ "baremodule" | "module" ] <ident>
1216 static void parseModule (lexerState *lexer, vString *scope, int parent_kind)
1218 advanceToken(lexer, true, false);
1219 if (lexer->cur_token != TOKEN_IDENTIFIER)
1221 return;
1224 addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind);
1225 addToScope(scope, lexer->token_str);
1226 advanceToken(lexer, true, false);
1227 parseExpr(lexer, true, K_MODULE, scope);
1231 * Parse comma separated entity in import/using expressions. An entity could be
1232 * in the form of "Module" or "Module.symbol". The lexer should be at the end
1233 * of "Module", and this function will take it to the end of the entity
1234 * (whitespaces also skipped).
1236 static void parseImportEntity (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1238 if (lexer->cur_c == '.')
1240 if (token_type == TOKEN_IMPORT)
1242 vString *module_name = vStringNewCopy(lexer->token_str);
1243 addReferenceTag(module_name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1244 advanceChar(lexer);
1245 advanceToken(lexer, true, false);
1246 addReferenceTag(lexer->token_str, K_UNKNOWN, JULIA_UNKNOWN_IMPORTED, lexer->line, lexer->pos, module_name, K_MODULE);
1247 vStringDelete(module_name);
1249 else /* if (token_type == TOKEN_USING) */
1251 /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
1252 advanceChar(lexer);
1253 advanceToken(lexer, true, false);
1256 else
1258 if (token_type == TOKEN_IMPORT)
1260 addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_IMPORTED, lexer->line, lexer->pos, scope, parent_kind);
1262 else /* if (token_type == TOKEN_USING) */
1264 addReferenceTag(lexer->token_str, K_MODULE, JULIA_MODULE_USED, lexer->line, lexer->pos, scope, parent_kind);
1269 /* Parse import/using expressions with a colon, like: */
1270 /* import Module: symbol1, symbol2 */
1271 /* using Module: symbol1, symbol2 */
1272 /* The lexer should be at the end of "Module", and this function will take it
1273 * to the end of the token after this expression (whitespaces also skipped). */
1274 static void parseColonImportExpr (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1276 int symbol_role;
1277 if (token_type == TOKEN_IMPORT)
1279 symbol_role = JULIA_UNKNOWN_IMPORTED;
1281 else /* if (token_type == TOKEN_USING) */
1283 symbol_role = JULIA_UNKNOWN_USED;
1285 vString *name = vStringNewCopy(lexer->token_str);
1286 addReferenceTag(name, K_MODULE, JULIA_MODULE_NAMESPACE, lexer->line, lexer->pos, scope, parent_kind);
1287 advanceChar(lexer);
1288 advanceToken(lexer, true, false);
1289 if (lexer->cur_token == TOKEN_NEWLINE)
1291 advanceToken(lexer, true, false);
1293 while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1295 addReferenceTag(lexer->token_str, K_UNKNOWN, symbol_role, lexer->line, lexer->pos, name, K_MODULE);
1296 if (lexer->cur_c == ',')
1298 advanceChar(lexer);
1299 advanceToken(lexer, true, false);
1300 if (lexer->cur_token == TOKEN_NEWLINE)
1302 advanceToken(lexer, true, false);
1305 else
1307 advanceToken(lexer, true, false);
1310 vStringDelete(name);
1313 /* Import format:
1314 * [ "import" | "using" ] <ident> [: <name>]
1316 static void parseImport (lexerState *lexer, vString *scope, int token_type, int parent_kind)
1318 /* capture the imported name */
1319 advanceToken(lexer, true, false);
1320 /* import Mod1: symbol1, symbol2 */
1321 /* using Mod1: symbol1, symbol2 */
1322 if (lexer->cur_c == ':')
1324 parseColonImportExpr(lexer, scope, token_type, parent_kind);
1326 /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
1327 else
1329 while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL)
1331 parseImportEntity(lexer, scope, token_type, parent_kind);
1332 if (lexer->cur_c == ',')
1334 advanceChar(lexer);
1335 advanceToken(lexer, true, false);
1336 if (lexer->cur_token == TOKEN_NEWLINE)
1338 advanceToken(lexer, true, false);
1341 else
1343 advanceToken(lexer, true, false);
1349 /* Structs format:
1350 * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
1351 * */
1352 static void parseStruct (lexerState *lexer, vString *scope, int parent_kind)
1354 vString *name;
1355 vString *field;
1356 size_t old_scope_len;
1357 unsigned long line;
1358 MIOPos pos;
1360 advanceToken(lexer, true, false);
1361 if (lexer->cur_token != TOKEN_IDENTIFIER)
1363 return;
1366 name = vStringNewCopy(lexer->token_str);
1367 field = vStringNew();
1368 line = lexer->line;
1369 pos = lexer->pos;
1371 /* scan parametrization */
1372 advanceToken(lexer, true, false);
1373 if (lexer->cur_token == TOKEN_CURLY_BLOCK)
1375 addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind);
1376 advanceToken(lexer, true, false);
1378 else
1380 addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind);
1382 addToScope(scope, name);
1384 /* skip inheritance */
1385 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1387 advanceToken(lexer, true, false);
1390 /* keep the struct scope in memory to reset it after parsing constructors */
1391 old_scope_len = vStringLength(scope);
1392 /* Parse fields and inner constructors */
1393 while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK)
1395 if (lexer->cur_token == TOKEN_IDENTIFIER && lexer->first_token)
1397 if (strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0)
1399 /* inner constructor */
1400 parseShortFunction(lexer, scope, K_STRUCT);
1401 continue;
1404 vStringCopy(field, lexer->token_str);
1406 /* parse type annotation */
1407 advanceToken(lexer, true, false);
1408 if (lexer->cur_token == TOKEN_TYPE_ANNOTATION)
1410 addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1411 advanceToken(lexer, true, false);
1413 else
1415 addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT);
1418 else if (lexer->cur_token == TOKEN_FUNCTION)
1420 /* inner constructor */
1421 parseFunction(lexer, scope, K_STRUCT);
1423 else
1425 /* Get next token */
1426 advanceToken(lexer, true, false);
1428 resetScope(scope, old_scope_len);
1431 vStringDelete(name);
1432 vStringDelete(field);
1436 static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope)
1438 int level = 1;
1439 size_t old_scope_len;
1440 vString *local_scope = NULL;
1442 while (lexer->cur_token != TOKEN_EOF)
1444 old_scope_len = vStringLength(scope);
1445 /* Advance token and update if this is a new line */
1446 while (lexer->cur_token == TOKEN_NEWLINE ||
1447 lexer->cur_token == TOKEN_SEMICOLON ||
1448 lexer->cur_token == TOKEN_NONE )
1450 advanceToken(lexer, true, false);
1453 /* Make sure every case advances the token
1454 * otherwise we can be stuck in infinite loop */
1455 switch (lexer->cur_token)
1457 case TOKEN_CONST:
1458 parseConst(lexer, scope, kind);
1459 break;
1460 case TOKEN_FUNCTION:
1461 parseFunction(lexer, scope, kind);
1462 break;
1463 case TOKEN_MACRO:
1464 parseMacro(lexer, scope, kind);
1465 break;
1466 case TOKEN_MODULE:
1467 parseModule(lexer, scope, kind);
1468 break;
1469 case TOKEN_STRUCT:
1470 parseStruct(lexer, scope, kind);
1471 break;
1472 case TOKEN_TYPE:
1473 parseType(lexer, scope, kind);
1474 break;
1475 case TOKEN_IMPORT:
1476 parseImport(lexer, scope, TOKEN_IMPORT, kind);
1477 break;
1478 case TOKEN_USING:
1479 parseImport(lexer, scope, TOKEN_USING, kind);
1480 case TOKEN_IDENTIFIER:
1481 if (lexer->first_token && lexer->cur_c == '.')
1483 if (local_scope == NULL)
1485 local_scope = vStringNew();
1487 vStringCopy(local_scope, lexer->token_str);
1488 advanceChar(lexer);
1489 // next token, but keep the first_token value
1490 advanceToken(lexer, true, true);
1491 skipWhitespace(lexer, false);
1492 if (lexer->cur_c == '(')
1494 parseShortFunction(lexer, local_scope, K_MODULE);
1497 else
1499 skipWhitespace(lexer, false);
1500 if (lexer->first_token && (lexer->cur_c == '(' || lexer->cur_c == '{'))
1502 parseShortFunction(lexer, scope, kind);
1504 else
1506 advanceToken(lexer, true, false);
1509 break;
1510 case TOKEN_OPEN_BLOCK:
1511 level++;
1512 advanceToken(lexer, true, false);
1513 break;
1514 case TOKEN_CLOSE_BLOCK:
1515 level--;
1516 advanceToken(lexer, true, false);
1517 break;
1518 default:
1519 advanceToken(lexer, true, false);
1520 break;
1522 resetScope(scope, old_scope_len);
1523 if (delim && level <= 0)
1525 break;
1528 vStringDelete(local_scope);
1531 static void findJuliaTags (void)
1533 lexerState lexer;
1534 vString* scope = vStringNew();
1535 initLexer(&lexer);
1537 parseExpr(&lexer, false, K_NONE, scope);
1538 vStringDelete(scope);
1540 deInitLexer(&lexer);
1543 extern parserDefinition* JuliaParser (void)
1545 static const char *const extensions [] = { "jl", NULL };
1546 parserDefinition* def = parserNew ("Julia");
1547 def->kindTable = JuliaKinds;
1548 def->kindCount = ARRAY_SIZE (JuliaKinds);
1549 def->extensions = extensions;
1550 def->parser = findJuliaTags;
1551 def->keywordTable = JuliaKeywordTable;
1552 def->keywordCount = ARRAY_SIZE (JuliaKeywordTable);
1553 return def;