2 * Copyright (c) 2020-2021, getzze <getzze@gmail.com>
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for Julia files.
10 * https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide
11 * Language parser in Scheme:
12 * https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
18 #include "general.h" /* must always come first */
34 #define MAX_STRING_LENGTH 256
52 JULIA_MODULE_IMPORTED
,
54 JULIA_MODULE_NAMESPACE
,
58 JULIA_UNKNOWN_IMPORTED
,
63 * using X X = (kind:module, role:used)
65 * using X: a, b X = (kind:module, role:namespace)
66 * a, b = (kind:unknown, role:used, scope:module:X)
68 * import X X = (kind:module, role:imported)
70 * import X.a, Y.b X, Y = (kind:module, role:namespace)
71 * a, b = (kind:unknown, role:imported, scope:module:X)
73 * import X: a, b Same as the above one
75 static roleDefinition JuliaModuleRoles
[] = {
76 { true, "imported", "loaded by \"import\"" },
77 { true, "used", "loaded by \"using\"" },
78 { true, "namespace", "only some symbols in it are imported" },
81 static roleDefinition JuliaUnknownRoles
[] = {
82 { true, "imported", "loaded by \"import\"" },
83 { true, "used", "loaded by \"using\""},
86 static kindDefinition JuliaKinds
[] = {
87 { true, 'c', "constant", "Constants" },
88 { true, 'f', "function", "Functions" },
89 { true, 'g', "field", "Fields" },
90 { true, 'm', "macro", "Macros" },
91 { true, 'n', "module", "Modules",
92 ATTACH_ROLES(JuliaModuleRoles
) },
93 { true, 's', "struct", "Structures" },
94 { true, 't', "type", "Types" },
95 { true, 'x', "unknown", "name defined in other modules",
96 .referenceOnly
= true, ATTACH_ROLES(JuliaUnknownRoles
) },
100 TOKEN_NONE
=0, /* none */
107 TOKEN_TYPE_ANNOTATION
,
110 TOKEN_STRING
, /* = 10 */
120 TOKEN_IMPORT
, /* = 20 */
125 TOKEN_COMPOSER_KWD
, /* KEYWORD only */
130 static const keywordTable JuliaKeywordTable
[] = {
131 /* TODO: Sort by keys. */
132 { "mutable", TOKEN_COMPOSER_KWD
},
133 { "primitive", TOKEN_COMPOSER_KWD
},
134 { "abstract", TOKEN_COMPOSER_KWD
},
136 { "if", TOKEN_OPEN_BLOCK
},
137 { "for", TOKEN_OPEN_BLOCK
},
138 { "while", TOKEN_OPEN_BLOCK
},
139 { "try", TOKEN_OPEN_BLOCK
},
140 { "do", TOKEN_OPEN_BLOCK
},
141 { "begin", TOKEN_OPEN_BLOCK
},
142 { "let", TOKEN_OPEN_BLOCK
},
143 { "quote", TOKEN_OPEN_BLOCK
},
145 { "module", TOKEN_MODULE
},
146 { "baremodule",TOKEN_MODULE
},
148 { "using", TOKEN_USING
},
149 { "import", TOKEN_IMPORT
},
151 { "export", TOKEN_EXPORT
},
152 { "const", TOKEN_CONST
},
153 { "macro", TOKEN_MACRO
},
154 { "function", TOKEN_FUNCTION
},
155 { "struct", TOKEN_STRUCT
},
156 { "type", TOKEN_TYPE
},
157 { "where", TOKEN_TYPE_WHERE
},
158 { "end", TOKEN_CLOSE_BLOCK
},
176 * FUNCTION PROTOTYPES
179 static void parseExpr (lexerState
*lexer
, bool delim
, int kind
, vString
*scope
);
181 static void scanParenBlock (lexerState
*lexer
);
184 * FUNCTION DEFINITIONS
187 static int endswith(const char* what
, const char* withwhat
)
189 int l1
= strlen(what
);
190 int l2
= strlen(withwhat
);
196 return strcmp(withwhat
, what
+ (l1
- l2
)) == 0;
199 /* Resets the scope string to the old length */
200 static void resetScope (vString
*scope
, size_t old_len
)
202 vStringTruncate (scope
, old_len
);
205 /* Adds a name to the end of the scope string */
206 static void addToScope (vString
*scope
, vString
*name
)
208 if (vStringLength(scope
) > 0)
210 vStringPut(scope
, '.');
212 vStringCat(scope
, name
);
215 /* Reads a character from the file */
216 static void advanceChar (lexerState
*lexer
)
218 lexer
->prev_c
= lexer
->cur_c
;
219 lexer
->cur_c
= lexer
->next_c
;
220 lexer
->next_c
= getcFromInputFile();
223 /* Reads N characters from the file */
224 static void advanceNChar (lexerState
*lexer
, int n
)
232 /* Store the current character in lexerState::token_str if there is space
233 * (set by MAX_STRING_LENGTH), and then read the next character from the file */
234 static void advanceAndStoreChar (lexerState
*lexer
)
236 if (vStringLength(lexer
->token_str
) < MAX_STRING_LENGTH
)
238 vStringPut(lexer
->token_str
, (char) lexer
->cur_c
);
243 static bool isWhitespace (int c
, bool newline
)
247 return c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n';
249 return c
== ' ' || c
== '\t';
252 static bool isAscii (int c
)
254 return (c
>= 0) && (c
< 0x80);
257 static bool isOperator (int c
)
259 if (c
== '%' || c
== '^' || c
== '&' || c
== '|' ||
260 c
== '*' || c
== '-' || c
== '+' || c
== '~' ||
261 c
== '<' || c
== '>' || c
== ',' || c
== '/' ||
262 c
== '?' || c
== '=' || c
== ':' )
269 /* This does not distinguish Unicode letters from operators... */
270 static bool isIdentifierFirstCharacter (int c
)
272 return (bool) ((isAscii(c
) && (isalpha (c
) || c
== '_')) || c
>= 0xC0);
275 /* This does not distinguish Unicode letters from operators... */
276 static bool isIdentifierCharacter (int c
)
278 return (bool) (isIdentifierFirstCharacter(c
) || (isAscii(c
) && (isdigit(c
) || c
== '!')) || c
>= 0x80);
281 static void skipWhitespace (lexerState
*lexer
, bool newline
)
283 while (isWhitespace(lexer
->cur_c
, newline
))
289 /* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */
290 static bool isTranspose (int c
)
292 return (isIdentifierCharacter(c
) || c
== ')' || c
== ']');
300 /* Check that the current character sequence is a type declaration or inheritance */
301 static bool isTypeDecl (lexerState
*lexer
)
303 if ((lexer
->prev_c
!= '.' && lexer
->cur_c
== '<' && lexer
->next_c
== ':') ||
304 (lexer
->prev_c
!= '.' && lexer
->cur_c
== '>' && lexer
->next_c
== ':') ||
305 (lexer
->cur_c
== ':' && lexer
->next_c
== ':') )
312 /* Check if the current char is a new line */
313 static bool isNewLine (lexerState
*lexer
)
315 return (lexer
->cur_c
== '\n')? true: false;
318 /* Check if the current char is a new line.
319 * If it is, skip the newline and return true */
320 static bool skipNewLine (lexerState
*lexer
)
322 if (isNewLine(lexer
))
330 /* Skip a single comment or multiline comment
331 * A single line comment starts with #
332 * A multi-line comment is encapsulated in #=...=# and they are nesting
334 static void skipComment (lexerState
*lexer
)
337 if (lexer
->next_c
!= '=')
339 advanceNChar(lexer
, 1);
340 while (lexer
->cur_c
!= EOF
&& lexer
->cur_c
!= '\n')
346 else /* if (lexer->next_c == '=') */
349 advanceNChar(lexer
, 2);
350 while (lexer
->cur_c
!= EOF
&& level
> 0)
352 if (lexer
->cur_c
== '=' && lexer
->next_c
== '#')
355 advanceNChar(lexer
, 2);
357 else if (lexer
->cur_c
== '#' && lexer
->next_c
== '=')
360 advanceNChar(lexer
, 2);
370 static void scanIdentifier (lexerState
*lexer
, bool clear
)
374 vStringClear(lexer
->token_str
);
379 advanceAndStoreChar(lexer
);
380 } while(lexer
->cur_c
!= EOF
&& isIdentifierCharacter(lexer
->cur_c
));
383 /* Scan a quote-like expression.
384 * Allow for triple-character variand and interpolation with `$`.
385 * These last past the end of the line, so be careful
386 * not to store too much of them (see MAX_STRING_LENGTH). */
387 static void scanStringOrCommand (lexerState
*lexer
, int c
)
389 bool istriple
= false;
391 /* Pass the first "quote"-character */
392 advanceAndStoreChar(lexer
);
394 /* Check for triple "quote"-character */
395 if (lexer
->cur_c
== c
&& lexer
->next_c
== c
)
398 advanceAndStoreChar(lexer
);
399 advanceAndStoreChar(lexer
);
401 /* Cancel up to 2 "quote"-characters after opening the triple */
402 if (lexer
->cur_c
== c
)
404 advanceAndStoreChar(lexer
);
405 if (lexer
->cur_c
== c
)
407 advanceAndStoreChar(lexer
);
412 while (lexer
->cur_c
!= EOF
&& lexer
->cur_c
!= c
)
414 /* Check for interpolation before checking for end of "quote" */
415 if (lexer
->cur_c
== '$' && lexer
->next_c
== '(')
417 advanceAndStoreChar(lexer
);
418 scanParenBlock(lexer
);
419 /* continue to avoid advance character again. Correct bug
420 * with "quote"-character just after closing parenthesis */
424 if (lexer
->cur_c
== '\\' &&
425 (lexer
->next_c
== c
|| lexer
->next_c
== '\\'))
427 advanceAndStoreChar(lexer
);
429 advanceAndStoreChar(lexer
);
431 /* Cancel up to 2 "quote"-characters if triple string */
432 if (istriple
&& lexer
->cur_c
== c
)
434 advanceAndStoreChar(lexer
);
435 if (lexer
->cur_c
== c
)
437 advanceAndStoreChar(lexer
);
441 /* Pass the last "quote"-character */
442 advanceAndStoreChar(lexer
);
446 /* Scan commands surrounded by backticks,
447 * possibly triple backticks */
448 static void scanCommand (lexerState
*lexer
)
450 scanStringOrCommand(lexer
, '`');
453 /* Double-quoted strings,
454 * possibly triple doublequotes */
455 static void scanString (lexerState
*lexer
)
457 scanStringOrCommand(lexer
, '"');
461 /* This deals with character literals: 'n', '\n', '\uFFFF';
462 * and matrix transpose: A'.
463 * We'll use this approximate regexp for the literals:
464 * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \'
465 * Either way, we'll treat this token as a string, so it gets preserved */
466 static bool scanCharacterOrTranspose (lexerState
*lexer
)
468 if (isTranspose(lexer
->prev_c
))
470 /* deal with untranspose/transpose sequence */
471 while (lexer
->cur_c
!= EOF
&& lexer
->cur_c
== '\'')
473 advanceAndStoreChar(lexer
);
478 //vStringClear(lexer->token_str);
479 advanceAndStoreChar(lexer
);
481 if (lexer
->cur_c
== '\\')
483 advanceAndStoreChar(lexer
);
484 /* The \' \\ \' \' (literally '\'') case */
485 if (lexer
->cur_c
== '\'' && lexer
->next_c
== '\'')
487 advanceAndStoreChar(lexer
);
488 advanceAndStoreChar(lexer
);
490 /* The \' \\ [^']+ \' case */
493 while (lexer
->cur_c
!= EOF
&& lexer
->cur_c
!= '\'')
495 advanceAndStoreChar(lexer
);
499 /* The \' [^'] \' and \' \' \' cases */
500 else if (lexer
->next_c
== '\'')
502 advanceAndStoreChar(lexer
);
503 advanceAndStoreChar(lexer
);
505 /* Otherwise it is malformed */
509 /* Parse a block with opening and closing character */
510 static void scanBlock (lexerState
*lexer
, int open
, int close
, bool convert_newline
)
512 /* Assume the current char is `open` */
515 /* Pass the first opening */
516 advanceAndStoreChar(lexer
);
518 while (lexer
->cur_c
!= EOF
&& level
> 0)
520 /* Parse everything */
521 if (lexer
->cur_c
== ' ' || lexer
->cur_c
== '\t')
523 skipWhitespace(lexer
, false);
524 vStringPut(lexer
->token_str
, ' ');
526 if (lexer
->cur_c
== '#')
530 else if (lexer
->cur_c
== '\"')
534 else if (lexer
->cur_c
== '\'')
536 scanCharacterOrTranspose(lexer
);
539 /* Parse opening/closing */
540 if (lexer
->cur_c
== open
)
544 else if (lexer
->cur_c
== close
)
549 if (convert_newline
&& skipNewLine(lexer
))
551 vStringPut(lexer
->token_str
, ' ');
555 advanceAndStoreChar(lexer
);
559 /* Lexer position is just after `close` */
563 /* Parse a block inside parenthesis, for example a function argument list */
564 static void scanParenBlock (lexerState
*lexer
)
566 scanBlock(lexer
, '(', ')', true);
569 /* Indexing block with bracket.
570 * Some keywords have a special meaning in this environment:
571 * end, begin, for and if */
572 static void scanIndexBlock (lexerState
*lexer
)
574 scanBlock(lexer
, '[', ']', false);
578 /* Parse a block inside curly brackets, for type parametrization */
579 static void scanCurlyBlock (lexerState
*lexer
)
581 scanBlock(lexer
, '{', '}', true);
584 /* Scan type annotation like
585 * `::Type`, `::Type{T}`
587 static void scanTypeAnnotation (lexerState
*lexer
)
589 /* assume that current char is '<', '>' or ':', followed by ':' */
590 advanceAndStoreChar(lexer
);
591 advanceAndStoreChar(lexer
);
593 skipWhitespace(lexer
, true);
594 scanIdentifier(lexer
, false);
595 if (lexer
->cur_c
== '{')
597 scanCurlyBlock(lexer
);
601 /* Scan type annotation like
602 * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}`
604 static void scanTypeWhere (lexerState
*lexer
)
606 /* assume that current token is 'where'
607 * allow line continuation */
608 vStringPut(lexer
->token_str
, ' ');
609 skipWhitespace(lexer
, true);
611 while (lexer
->cur_c
!= EOF
)
614 if (lexer
->cur_c
== '{')
616 scanCurlyBlock(lexer
);
618 else if (isIdentifierFirstCharacter(lexer
->cur_c
))
620 scanIdentifier(lexer
, false);
621 if (endswith(vStringValue(lexer
->token_str
), "where"))
623 /* allow line continuation */
624 vStringPut(lexer
->token_str
, ' ');
625 skipWhitespace(lexer
, true);
628 else if (isTypeDecl(lexer
))
630 scanTypeAnnotation(lexer
);
631 //skipWhitespace(lexer, false);
633 else if (lexer
->cur_c
== '#')
636 /* allow line continuation */
637 if (endswith(vStringValue(lexer
->token_str
), "where "))
639 skipWhitespace(lexer
, true);
642 else if (isWhitespace(lexer
->cur_c
, false))
644 while (isWhitespace(lexer
->cur_c
, false))
648 /* Add a space, if it is not a trailing space */
649 if (!(isNewLine(lexer
)))
651 vStringPut(lexer
->token_str
, ' ');
662 static int parseIdentifier (lexerState
*lexer
)
664 langType julia
= getInputLanguage ();
665 scanIdentifier(lexer
, true);
667 int k
= lookupKeyword (vStringValue(lexer
->token_str
), julia
);
668 /* First part of a composed identifier */
669 if (k
== TOKEN_COMPOSER_KWD
)
671 skipWhitespace(lexer
, false);
672 scanIdentifier(lexer
, true);
673 k
= lookupKeyword (vStringValue(lexer
->token_str
), julia
);
676 if ((k
== TOKEN_OPEN_BLOCK
)
677 || (k
== TOKEN_MODULE
)
678 || (k
== TOKEN_IMPORT
)
679 || (k
== TOKEN_USING
)
680 || (k
== TOKEN_EXPORT
)
681 || (k
== TOKEN_CONST
)
682 || (k
== TOKEN_MACRO
)
683 || (k
== TOKEN_FUNCTION
)
684 || (k
== TOKEN_STRUCT
)
686 || (k
== TOKEN_TYPE_WHERE
)
687 || (k
== TOKEN_CLOSE_BLOCK
))
689 if (k
== TOKEN_TYPE_WHERE
)
691 scanTypeWhere(lexer
);
693 return lexer
->cur_token
= k
;
695 return lexer
->cur_token
= TOKEN_IDENTIFIER
;
699 /* Advances the parser one token, optionally skipping whitespace
700 * (otherwise it is concatenated and returned as a single whitespace token).
701 * Whitespace is needed to properly render function signatures. Unrecognized
702 * token starts are stored literally, e.g. token may equal to a character '#'. */
703 static int advanceToken (lexerState
*lexer
, bool skip_whitespace
, bool propagate_first
)
705 bool have_whitespace
= false;
706 bool newline
= false;
707 lexer
->line
= getInputLineNumber();
708 lexer
->pos
= getInputFilePosition();
710 /* the next token is the first token of the line */
711 if (!propagate_first
)
713 if (lexer
->cur_token
== TOKEN_NEWLINE
||
714 lexer
->cur_token
== TOKEN_SEMICOLON
||
715 lexer
->cur_token
== TOKEN_NONE
||
716 (lexer
->first_token
&& lexer
->cur_token
== TOKEN_MACROCALL
))
718 lexer
->first_token
= true;
722 lexer
->first_token
= false;
726 while (lexer
->cur_c
!= EOF
)
728 /* skip whitespaces but not newlines */
729 if (isWhitespace(lexer
->cur_c
, newline
))
731 skipWhitespace(lexer
, newline
);
732 have_whitespace
= true;
734 else if (lexer
->cur_c
== '#')
737 have_whitespace
= true;
741 if (have_whitespace
&& !skip_whitespace
)
743 return lexer
->cur_token
= TOKEN_WHITESPACE
;
748 lexer
->line
= getInputLineNumber();
749 lexer
->pos
= getInputFilePosition();
750 while (lexer
->cur_c
!= EOF
)
752 if (lexer
->cur_c
== '"')
754 vStringClear(lexer
->token_str
);
756 return lexer
->cur_token
= TOKEN_STRING
;
758 else if (lexer
->cur_c
== '\'')
760 vStringClear(lexer
->token_str
);
761 if (scanCharacterOrTranspose(lexer
))
763 return lexer
->cur_token
= TOKEN_STRING
;
767 return lexer
->cur_token
= '\'';
770 else if (lexer
->cur_c
== '`')
772 vStringClear(lexer
->token_str
);
774 return lexer
->cur_token
= TOKEN_COMMAND
;
776 else if (isIdentifierFirstCharacter(lexer
->cur_c
))
778 return parseIdentifier(lexer
);
780 else if (lexer
->cur_c
== '@')
782 vStringClear(lexer
->token_str
);
783 advanceAndStoreChar(lexer
);
786 advanceAndStoreChar(lexer
);
787 } while(lexer
->cur_c
!= EOF
&& isIdentifierCharacter(lexer
->cur_c
));
788 return lexer
->cur_token
= TOKEN_MACROCALL
;
790 else if (lexer
->cur_c
== '(')
792 vStringClear(lexer
->token_str
);
793 scanParenBlock(lexer
);
794 return lexer
->cur_token
= TOKEN_PAREN_BLOCK
;
796 else if (lexer
->cur_c
== '[')
798 vStringClear(lexer
->token_str
);
799 scanIndexBlock(lexer
);
800 return lexer
->cur_token
= TOKEN_BRACKET_BLOCK
;
802 else if (lexer
->cur_c
== '{')
804 vStringClear(lexer
->token_str
);
805 scanCurlyBlock(lexer
);
806 return lexer
->cur_token
= TOKEN_CURLY_BLOCK
;
808 else if (isTypeDecl(lexer
))
810 vStringClear(lexer
->token_str
);
811 scanTypeAnnotation(lexer
);
812 return lexer
->cur_token
= TOKEN_TYPE_ANNOTATION
;
814 else if (skipNewLine(lexer
))
816 /* allow line continuation */
817 if (isOperator(lexer
->cur_token
))
819 return lexer
->cur_token
;
821 return lexer
->cur_token
= TOKEN_NEWLINE
;
823 else if (lexer
->cur_c
== ';')
826 return lexer
->cur_token
= TOKEN_SEMICOLON
;
830 int c
= lexer
->cur_c
;
832 return lexer
->cur_token
= c
;
835 return lexer
->cur_token
= TOKEN_EOF
;
838 static void initLexer (lexerState
*lexer
)
840 advanceNChar(lexer
, 2);
841 lexer
->token_str
= vStringNew();
842 lexer
->first_token
= true;
843 lexer
->cur_token
= TOKEN_NONE
;
844 lexer
->prev_c
= '\0';
846 if (lexer
->cur_c
== '#' && lexer
->next_c
== '!')
850 advanceToken(lexer
, true, false);
853 static void deInitLexer (lexerState
*lexer
)
855 vStringDelete(lexer
->token_str
);
856 lexer
->token_str
= NULL
;
860 static void debugLexer (lexerState
*lexer
)
862 printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer
->line
, lexer
->cur_token
, lexer
->cur_c
);
863 printf(vStringValue(lexer
->token_str
));
868 static void addTag (vString
* ident
, const char* type
, const char* arg_list
, int kind
, unsigned long line
, MIOPos pos
, vString
*scope
, int parent_kind
)
875 initTagEntry(&tag
, vStringValue(ident
), kind
);
877 tag
.lineNumber
= line
;
878 tag
.filePosition
= pos
;
879 tag
.sourceFileName
= getInputFileName();
881 tag
.extensionFields
.signature
= arg_list
;
882 /* tag.extensionFields.varType = type; */ /* Needs a workaround */
883 if (parent_kind
!= K_NONE
)
885 tag
.extensionFields
.scopeKindIndex
= parent_kind
;
886 tag
.extensionFields
.scopeName
= vStringValue(scope
);
891 static void addReferenceTag (vString
* ident
, int kind
, int role
, unsigned long line
, MIOPos pos
, vString
* scope
, int parent_kind
)
898 initRefTagEntry(&tag
, vStringValue(ident
), kind
, role
);
899 tag
.lineNumber
= line
;
900 tag
.filePosition
= pos
;
901 if (parent_kind
!= K_NONE
)
903 tag
.extensionFields
.scopeKindIndex
= parent_kind
;
904 tag
.extensionFields
.scopeName
= vStringValue(scope
);
909 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
910 * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
911 static void skipUntil (lexerState
*lexer
, int goal_tokens
[], int num_goal_tokens
)
915 while (lexer
->cur_token
!= TOKEN_EOF
)
917 /* check if the keyword is reached, only if outside a block */
918 if (block_level
== 0)
921 for(ii
= 0; ii
< num_goal_tokens
; ii
++)
923 if (lexer
->cur_token
== goal_tokens
[ii
])
928 if (ii
< num_goal_tokens
)
930 /* parse the next token */
931 advanceToken(lexer
, true, false);
936 /* take into account nested blocks */
937 switch (lexer
->cur_token
)
939 case TOKEN_OPEN_BLOCK
:
942 case TOKEN_CLOSE_BLOCK
:
949 /* Has to be after the token switch to catch the case when we start with the initial level token */
950 if (num_goal_tokens
== 0 && block_level
== 0)
955 advanceToken(lexer
, true, false);
959 /* Skip until the end of the block */
960 static void skipUntilEnd (lexerState
*lexer
)
962 int goal_tokens
[] = { TOKEN_CLOSE_BLOCK
};
964 skipUntil(lexer
, goal_tokens
, 1);
967 /* Skip a function body after assignment operator '='
968 * Beware of continuation lines after operators
970 static void skipBody (lexerState
*lexer
)
972 /* assume position just after '=' */
973 while (lexer
->cur_token
!= TOKEN_EOF
&& lexer
->cur_token
!= TOKEN_NEWLINE
)
975 advanceToken(lexer
, true, false);
977 if (lexer
->cur_token
== TOKEN_OPEN_BLOCK
)
979 /* pass the keyword */
980 advanceToken(lexer
, true, false);
982 /* the next token is already selected */
987 /* Short function format:
988 * <ident> ( [<args>] ) [::<type>] [<where>] = [begin] <body> [end]
990 static void parseShortFunction (lexerState
*lexer
, vString
*scope
, int parent_kind
)
992 /* assume the current char is just after identifier */
998 /* should be an open parenthesis after identifier
999 * with potentially parametric type */
1000 skipWhitespace(lexer
, false);
1001 if (lexer
->cur_c
== '{')
1003 scanCurlyBlock(lexer
);
1004 skipWhitespace(lexer
, false);
1007 if (lexer
->cur_c
!= '(')
1009 advanceToken(lexer
, true, false);
1013 name
= vStringNewCopy(lexer
->token_str
);
1017 /* scan argument list */
1018 advanceToken(lexer
, true, false);
1019 arg_list
= vStringNewCopy(lexer
->token_str
);
1021 /* scan potential type casting */
1022 advanceToken(lexer
, true, false);
1023 if (lexer
->cur_token
== TOKEN_TYPE_ANNOTATION
)
1025 vStringCat(arg_list
, lexer
->token_str
);
1026 advanceToken(lexer
, true, false);
1028 /* scan potential type union with 'where' */
1029 if (lexer
->cur_token
== TOKEN_TYPE_WHERE
)
1031 vStringPut(arg_list
, ' ');
1032 vStringCat(arg_list
, lexer
->token_str
);
1033 advanceToken(lexer
, true, false);
1036 /* scan equal sign, ignore `==` and `=>` */
1037 if (!(lexer
->cur_token
== '=' &&
1038 lexer
->cur_c
!= '=' &&
1039 lexer
->cur_c
!= '>'))
1041 vStringDelete(name
);
1042 vStringDelete(arg_list
);
1046 addTag(name
, NULL
, vStringValue(arg_list
), K_FUNCTION
, line
, pos
, scope
, parent_kind
);
1048 /* scan until end of function definition */
1051 /* Should end on a new line, parse next token */
1052 advanceToken(lexer
, true, false);
1053 lexer
->first_token
= true;
1055 vStringDelete(name
);
1056 vStringDelete(arg_list
);
1060 * function <ident> ( [<args>] ) [::<type>] [<where>] [<body>] end
1062 static void parseFunction (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1066 vString
*local_scope
;
1067 int local_parent_kind
;
1071 advanceToken(lexer
, true, false);
1072 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1076 else if (lexer
->cur_c
== '.')
1078 local_scope
= vStringNewCopy(lexer
->token_str
);
1079 local_parent_kind
= K_MODULE
;
1081 advanceToken(lexer
, true, false);
1085 local_scope
= vStringNewCopy(scope
);
1086 local_parent_kind
= parent_kind
;
1089 /* Scan for parametric type constructor */
1090 skipWhitespace(lexer
, false);
1091 if (lexer
->cur_c
== '{')
1093 scanCurlyBlock(lexer
);
1094 skipWhitespace(lexer
, false);
1097 name
= vStringNewCopy(lexer
->token_str
);
1098 arg_list
= vStringNew();
1102 advanceToken(lexer
, true, false);
1103 if (lexer
->cur_token
== TOKEN_PAREN_BLOCK
)
1105 vStringCopy(arg_list
, lexer
->token_str
);
1107 /* scan potential type casting */
1108 advanceToken(lexer
, true, false);
1109 if (lexer
->cur_token
== TOKEN_TYPE_ANNOTATION
)
1111 vStringCat(arg_list
, lexer
->token_str
);
1112 advanceToken(lexer
, true, false);
1114 /* scan potential type union with 'where' */
1115 if (lexer
->cur_token
== TOKEN_TYPE_WHERE
)
1117 vStringPut(arg_list
, ' ');
1118 vStringCat(arg_list
, lexer
->token_str
);
1119 advanceToken(lexer
, true, false);
1122 addTag(name
, NULL
, vStringValue(arg_list
), K_FUNCTION
, line
, pos
, local_scope
, local_parent_kind
);
1123 addToScope(scope
, name
);
1124 parseExpr(lexer
, true, K_FUNCTION
, scope
);
1126 else if (lexer
->cur_token
== TOKEN_CLOSE_BLOCK
)
1128 /* Function without method */
1129 addTag(name
, NULL
, NULL
, K_FUNCTION
, line
, pos
, local_scope
, local_parent_kind
);
1130 /* Go to the closing 'end' keyword */
1131 skipUntilEnd(lexer
);
1134 vStringDelete(name
);
1135 vStringDelete(arg_list
);
1136 vStringDelete(local_scope
);
1142 static void parseMacro (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1148 advanceToken(lexer
, true, false);
1149 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1154 name
= vStringNewCopy(lexer
->token_str
);
1158 advanceToken(lexer
, true, false);
1159 if (lexer
->cur_token
== TOKEN_PAREN_BLOCK
)
1161 addTag(name
, NULL
, vStringValue(lexer
->token_str
), K_MACRO
, line
, pos
, scope
, parent_kind
);
1164 skipUntilEnd(lexer
);
1165 vStringDelete(name
);
1171 static void parseConst (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1175 advanceToken(lexer
, true, false);
1176 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1181 name
= vStringNewCopy(lexer
->token_str
);
1183 advanceToken(lexer
, true, false);
1184 if (lexer
->cur_token
== TOKEN_TYPE_ANNOTATION
)
1186 addTag(name
, "const", vStringValue(lexer
->token_str
), K_CONSTANT
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1187 advanceToken(lexer
, true, false);
1191 addTag(name
, "const", NULL
, K_CONSTANT
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1194 vStringDelete(name
);
1198 * [ "abstract" | "primitive" ] "type" <ident>
1200 static void parseType (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1202 advanceToken(lexer
, true, false);
1203 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1208 addTag(lexer
->token_str
, NULL
, NULL
, K_TYPE
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1210 skipUntilEnd(lexer
);
1214 * [ "baremodule" | "module" ] <ident>
1216 static void parseModule (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1218 advanceToken(lexer
, true, false);
1219 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1224 addTag(lexer
->token_str
, NULL
, NULL
, K_MODULE
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1225 addToScope(scope
, lexer
->token_str
);
1226 advanceToken(lexer
, true, false);
1227 parseExpr(lexer
, true, K_MODULE
, scope
);
1231 * Parse comma separated entity in import/using expressions. An entity could be
1232 * in the form of "Module" or "Module.symbol". The lexer should be at the end
1233 * of "Module", and this function will take it to the end of the entity
1234 * (whitespaces also skipped).
1236 static void parseImportEntity (lexerState
*lexer
, vString
*scope
, int token_type
, int parent_kind
)
1238 if (lexer
->cur_c
== '.')
1240 if (token_type
== TOKEN_IMPORT
)
1242 vString
*module_name
= vStringNewCopy(lexer
->token_str
);
1243 addReferenceTag(module_name
, K_MODULE
, JULIA_MODULE_NAMESPACE
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1245 advanceToken(lexer
, true, false);
1246 addReferenceTag(lexer
->token_str
, K_UNKNOWN
, JULIA_UNKNOWN_IMPORTED
, lexer
->line
, lexer
->pos
, module_name
, K_MODULE
);
1247 vStringDelete(module_name
);
1249 else /* if (token_type == TOKEN_USING) */
1251 /* using Module.symbol is invalid, so we advance the lexer but don't tag it. */
1253 advanceToken(lexer
, true, false);
1258 if (token_type
== TOKEN_IMPORT
)
1260 addReferenceTag(lexer
->token_str
, K_MODULE
, JULIA_MODULE_IMPORTED
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1262 else /* if (token_type == TOKEN_USING) */
1264 addReferenceTag(lexer
->token_str
, K_MODULE
, JULIA_MODULE_USED
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1269 /* Parse import/using expressions with a colon, like: */
1270 /* import Module: symbol1, symbol2 */
1271 /* using Module: symbol1, symbol2 */
1272 /* The lexer should be at the end of "Module", and this function will take it
1273 * to the end of the token after this expression (whitespaces also skipped). */
1274 static void parseColonImportExpr (lexerState
*lexer
, vString
*scope
, int token_type
, int parent_kind
)
1277 if (token_type
== TOKEN_IMPORT
)
1279 symbol_role
= JULIA_UNKNOWN_IMPORTED
;
1281 else /* if (token_type == TOKEN_USING) */
1283 symbol_role
= JULIA_UNKNOWN_USED
;
1285 vString
*name
= vStringNewCopy(lexer
->token_str
);
1286 addReferenceTag(name
, K_MODULE
, JULIA_MODULE_NAMESPACE
, lexer
->line
, lexer
->pos
, scope
, parent_kind
);
1288 advanceToken(lexer
, true, false);
1289 if (lexer
->cur_token
== TOKEN_NEWLINE
)
1291 advanceToken(lexer
, true, false);
1293 while (lexer
->cur_token
== TOKEN_IDENTIFIER
|| lexer
->cur_token
== TOKEN_MACROCALL
)
1295 addReferenceTag(lexer
->token_str
, K_UNKNOWN
, symbol_role
, lexer
->line
, lexer
->pos
, name
, K_MODULE
);
1296 if (lexer
->cur_c
== ',')
1299 advanceToken(lexer
, true, false);
1300 if (lexer
->cur_token
== TOKEN_NEWLINE
)
1302 advanceToken(lexer
, true, false);
1307 advanceToken(lexer
, true, false);
1310 vStringDelete(name
);
1314 * [ "import" | "using" ] <ident> [: <name>]
1316 static void parseImport (lexerState
*lexer
, vString
*scope
, int token_type
, int parent_kind
)
1318 /* capture the imported name */
1319 advanceToken(lexer
, true, false);
1320 /* import Mod1: symbol1, symbol2 */
1321 /* using Mod1: symbol1, symbol2 */
1322 if (lexer
->cur_c
== ':')
1324 parseColonImportExpr(lexer
, scope
, token_type
, parent_kind
);
1326 /* All other situations, like import/using Mod1, Mod2.symbol1, Mod3... */
1329 while (lexer
->cur_token
== TOKEN_IDENTIFIER
|| lexer
->cur_token
== TOKEN_MACROCALL
)
1331 parseImportEntity(lexer
, scope
, token_type
, parent_kind
);
1332 if (lexer
->cur_c
== ',')
1335 advanceToken(lexer
, true, false);
1336 if (lexer
->cur_token
== TOKEN_NEWLINE
)
1338 advanceToken(lexer
, true, false);
1343 advanceToken(lexer
, true, false);
1350 * "struct" <ident>[{<param>}] [<:<type>]; <fields> <inner constructor> end
1352 static void parseStruct (lexerState
*lexer
, vString
*scope
, int parent_kind
)
1356 size_t old_scope_len
;
1360 advanceToken(lexer
, true, false);
1361 if (lexer
->cur_token
!= TOKEN_IDENTIFIER
)
1366 name
= vStringNewCopy(lexer
->token_str
);
1367 field
= vStringNew();
1371 /* scan parametrization */
1372 advanceToken(lexer
, true, false);
1373 if (lexer
->cur_token
== TOKEN_CURLY_BLOCK
)
1375 addTag(name
, NULL
, vStringValue(lexer
->token_str
), K_STRUCT
, line
, pos
, scope
, parent_kind
);
1376 advanceToken(lexer
, true, false);
1380 addTag(name
, NULL
, NULL
, K_STRUCT
, line
, pos
, scope
, parent_kind
);
1382 addToScope(scope
, name
);
1384 /* skip inheritance */
1385 if (lexer
->cur_token
== TOKEN_TYPE_ANNOTATION
)
1387 advanceToken(lexer
, true, false);
1390 /* keep the struct scope in memory to reset it after parsing constructors */
1391 old_scope_len
= vStringLength(scope
);
1392 /* Parse fields and inner constructors */
1393 while (lexer
->cur_token
!= TOKEN_EOF
&& lexer
->cur_token
!= TOKEN_CLOSE_BLOCK
)
1395 if (lexer
->cur_token
== TOKEN_IDENTIFIER
&& lexer
->first_token
)
1397 if (strcmp(vStringValue(lexer
->token_str
), vStringValue(name
)) == 0)
1399 /* inner constructor */
1400 parseShortFunction(lexer
, scope
, K_STRUCT
);
1404 vStringCopy(field
, lexer
->token_str
);
1406 /* parse type annotation */
1407 advanceToken(lexer
, true, false);
1408 if (lexer
->cur_token
== TOKEN_TYPE_ANNOTATION
)
1410 addTag(field
, NULL
, vStringValue(lexer
->token_str
), K_FIELD
, lexer
->line
, lexer
->pos
, scope
, K_STRUCT
);
1411 advanceToken(lexer
, true, false);
1415 addTag(field
, NULL
, NULL
, K_FIELD
, lexer
->line
, lexer
->pos
, scope
, K_STRUCT
);
1418 else if (lexer
->cur_token
== TOKEN_FUNCTION
)
1420 /* inner constructor */
1421 parseFunction(lexer
, scope
, K_STRUCT
);
1425 /* Get next token */
1426 advanceToken(lexer
, true, false);
1428 resetScope(scope
, old_scope_len
);
1431 vStringDelete(name
);
1432 vStringDelete(field
);
1436 static void parseExpr (lexerState
*lexer
, bool delim
, int kind
, vString
*scope
)
1439 size_t old_scope_len
;
1440 vString
*local_scope
= NULL
;
1442 while (lexer
->cur_token
!= TOKEN_EOF
)
1444 old_scope_len
= vStringLength(scope
);
1445 /* Advance token and update if this is a new line */
1446 while (lexer
->cur_token
== TOKEN_NEWLINE
||
1447 lexer
->cur_token
== TOKEN_SEMICOLON
||
1448 lexer
->cur_token
== TOKEN_NONE
)
1450 advanceToken(lexer
, true, false);
1453 /* Make sure every case advances the token
1454 * otherwise we can be stuck in infinite loop */
1455 switch (lexer
->cur_token
)
1458 parseConst(lexer
, scope
, kind
);
1460 case TOKEN_FUNCTION
:
1461 parseFunction(lexer
, scope
, kind
);
1464 parseMacro(lexer
, scope
, kind
);
1467 parseModule(lexer
, scope
, kind
);
1470 parseStruct(lexer
, scope
, kind
);
1473 parseType(lexer
, scope
, kind
);
1476 parseImport(lexer
, scope
, TOKEN_IMPORT
, kind
);
1479 parseImport(lexer
, scope
, TOKEN_USING
, kind
);
1480 case TOKEN_IDENTIFIER
:
1481 if (lexer
->first_token
&& lexer
->cur_c
== '.')
1483 if (local_scope
== NULL
)
1485 local_scope
= vStringNew();
1487 vStringCopy(local_scope
, lexer
->token_str
);
1489 // next token, but keep the first_token value
1490 advanceToken(lexer
, true, true);
1491 skipWhitespace(lexer
, false);
1492 if (lexer
->cur_c
== '(')
1494 parseShortFunction(lexer
, local_scope
, K_MODULE
);
1499 skipWhitespace(lexer
, false);
1500 if (lexer
->first_token
&& (lexer
->cur_c
== '(' || lexer
->cur_c
== '{'))
1502 parseShortFunction(lexer
, scope
, kind
);
1506 advanceToken(lexer
, true, false);
1510 case TOKEN_OPEN_BLOCK
:
1512 advanceToken(lexer
, true, false);
1514 case TOKEN_CLOSE_BLOCK
:
1516 advanceToken(lexer
, true, false);
1519 advanceToken(lexer
, true, false);
1522 resetScope(scope
, old_scope_len
);
1523 if (delim
&& level
<= 0)
1528 vStringDelete(local_scope
);
1531 static void findJuliaTags (void)
1534 vString
* scope
= vStringNew();
1537 parseExpr(&lexer
, false, K_NONE
, scope
);
1538 vStringDelete(scope
);
1540 deInitLexer(&lexer
);
1543 extern parserDefinition
* JuliaParser (void)
1545 static const char *const extensions
[] = { "jl", NULL
};
1546 parserDefinition
* def
= parserNew ("Julia");
1547 def
->kindTable
= JuliaKinds
;
1548 def
->kindCount
= ARRAY_SIZE (JuliaKinds
);
1549 def
->extensions
= extensions
;
1550 def
->parser
= findJuliaTags
;
1551 def
->keywordTable
= JuliaKeywordTable
;
1552 def
->keywordCount
= ARRAY_SIZE (JuliaKeywordTable
);