manual: added documentation about replacement of 'untitled.ext' with filename (#1804)
[geany-mirror.git] / ctags / parsers / go.c
blob2b56e54cfbeba80771aba9f7c4fc80586e20896b
1 /*
2 * This source code is released for free distribution under the terms of the
3 * GNU General Public License version 2 or (at your option) any later version.
5 * INCLUDE FILES
6 */
7 #include "general.h" /* must always come first */
9 #include "debug.h"
10 #include "entry.h"
11 #include "keyword.h"
12 #include "read.h"
13 #include "main.h"
14 #include "routines.h"
15 #include "vstring.h"
16 #include "options.h"
17 #include "xtag.h"
20 * MACROS
22 #define MAX_SIGNATURE_LENGTH 512
23 #define isType(token,t) (bool) ((token)->type == (t))
24 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
27 * DATA DECLARATIONS
30 enum eKeywordId {
31 KEYWORD_package,
32 KEYWORD_import,
33 KEYWORD_const,
34 KEYWORD_type,
35 KEYWORD_var,
36 KEYWORD_func,
37 KEYWORD_struct,
38 KEYWORD_interface,
39 KEYWORD_map,
40 KEYWORD_chan
42 typedef int keywordId; /* to allow KEYWORD_NONE */
44 typedef enum eTokenType {
45 TOKEN_NONE = -1,
46 // Token not important for top-level Go parsing
47 TOKEN_OTHER,
48 TOKEN_KEYWORD,
49 TOKEN_IDENTIFIER,
50 TOKEN_STRING,
51 TOKEN_OPEN_PAREN,
52 TOKEN_CLOSE_PAREN,
53 TOKEN_OPEN_CURLY,
54 TOKEN_CLOSE_CURLY,
55 TOKEN_OPEN_SQUARE,
56 TOKEN_CLOSE_SQUARE,
57 TOKEN_SEMICOLON,
58 TOKEN_STAR,
59 TOKEN_LEFT_ARROW,
60 TOKEN_DOT,
61 TOKEN_COMMA,
62 TOKEN_EOF
63 } tokenType;
65 typedef struct sTokenInfo {
66 tokenType type;
67 keywordId keyword;
68 vString *string; /* the name of the token */
69 unsigned long lineNumber; /* line number of tag */
70 MIOPos filePosition; /* file position of line containing name */
71 } tokenInfo;
74 * DATA DEFINITIONS
77 static int Lang_go;
78 static vString *scope;
79 static vString *signature = NULL;
81 typedef enum {
82 GOTAG_UNDEFINED = -1,
83 GOTAG_PACKAGE,
84 GOTAG_FUNCTION,
85 GOTAG_CONST,
86 GOTAG_TYPE,
87 GOTAG_VAR,
88 GOTAG_STRUCT,
89 GOTAG_INTERFACE,
90 GOTAG_MEMBER
91 } goKind;
93 static kindOption GoKinds[] = {
94 {true, 'p', "package", "packages"},
95 {true, 'f', "func", "functions"},
96 {true, 'c', "const", "constants"},
97 {true, 't', "type", "types"},
98 {true, 'v', "var", "variables"},
99 {true, 's', "struct", "structs"},
100 {true, 'i', "interface", "interfaces"},
101 {true, 'm', "member", "struct members"}
104 static const keywordTable GoKeywordTable[] = {
105 {"package", KEYWORD_package},
106 {"import", KEYWORD_import},
107 {"const", KEYWORD_const},
108 {"type", KEYWORD_type},
109 {"var", KEYWORD_var},
110 {"func", KEYWORD_func},
111 {"struct", KEYWORD_struct},
112 {"interface", KEYWORD_interface},
113 {"map", KEYWORD_map},
114 {"chan", KEYWORD_chan}
118 * FUNCTION DEFINITIONS
121 // XXX UTF-8
122 static bool isStartIdentChar (const int c)
124 return (bool)
125 (isalpha (c) || c == '_' || c > 128);
128 static bool isIdentChar (const int c)
130 return (bool)
131 (isStartIdentChar (c) || isdigit (c));
134 static void initialize (const langType language)
136 Lang_go = language;
139 static tokenInfo *newToken (void)
141 tokenInfo *const token = xMalloc (1, tokenInfo);
142 token->type = TOKEN_NONE;
143 token->keyword = KEYWORD_NONE;
144 token->string = vStringNew ();
145 token->lineNumber = getInputLineNumber ();
146 token->filePosition = getInputFilePosition ();
147 return token;
150 static tokenInfo *copyToken (tokenInfo *other)
152 tokenInfo *const token = xMalloc (1, tokenInfo);
153 token->type = other->type;
154 token->keyword = other->keyword;
155 token->string = vStringNewCopy (other->string);
156 token->lineNumber = other->lineNumber;
157 token->filePosition = other->filePosition;
158 return token;
161 static void deleteToken (tokenInfo * const token)
163 if (token != NULL)
165 vStringDelete (token->string);
166 eFree (token);
171 * Parsing functions
174 static void parseString (vString *const string, const int delimiter)
176 bool end = false;
177 while (!end)
179 int c = getcFromInputFile ();
180 if (c == EOF)
181 end = true;
182 else if (c == '\\' && delimiter != '`')
184 c = getcFromInputFile ();
185 if (c != '\'' && c != '\"')
186 vStringPut (string, '\\');
187 vStringPut (string, c);
189 else if (c == delimiter)
190 end = true;
191 else
192 vStringPut (string, c);
196 static void parseIdentifier (vString *const string, const int firstChar)
198 int c = firstChar;
201 vStringPut (string, c);
202 c = getcFromInputFile ();
203 } while (isIdentChar (c));
204 ungetcToInputFile (c); /* always unget, LF might add a semicolon */
207 static void readToken (tokenInfo *const token)
209 int c;
210 static tokenType lastTokenType = TOKEN_NONE;
211 bool firstWhitespace = true;
212 bool whitespace;
214 token->type = TOKEN_NONE;
215 token->keyword = KEYWORD_NONE;
216 vStringClear (token->string);
218 getNextChar:
221 c = getcFromInputFile ();
222 token->lineNumber = getInputLineNumber ();
223 token->filePosition = getInputFilePosition ();
224 if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER ||
225 lastTokenType == TOKEN_STRING ||
226 lastTokenType == TOKEN_OTHER ||
227 lastTokenType == TOKEN_CLOSE_PAREN ||
228 lastTokenType == TOKEN_CLOSE_CURLY ||
229 lastTokenType == TOKEN_CLOSE_SQUARE))
231 c = ';'; // semicolon injection
233 whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n';
234 if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
236 firstWhitespace = false;
237 vStringPut(signature, ' ');
240 while (whitespace);
242 switch (c)
244 case EOF:
245 token->type = TOKEN_EOF;
246 break;
248 case ';':
249 token->type = TOKEN_SEMICOLON;
250 break;
252 case '/':
254 bool hasNewline = false;
255 int d = getcFromInputFile ();
256 switch (d)
258 case '/':
259 skipToCharacterInInputFile ('\n');
260 /* Line comments start with the
261 * character sequence // and
262 * continue through the next
263 * newline. A line comment acts
264 * like a newline. */
265 ungetcToInputFile ('\n');
266 goto getNextChar;
267 case '*':
272 d = getcFromInputFile ();
273 if (d == '\n')
275 hasNewline = true;
277 } while (d != EOF && d != '*');
279 c = getcFromInputFile ();
280 if (c == '/')
281 break;
282 else
283 ungetcToInputFile (c);
284 } while (c != EOF && c != '\0');
286 ungetcToInputFile (hasNewline ? '\n' : ' ');
287 goto getNextChar;
288 default:
289 token->type = TOKEN_OTHER;
290 ungetcToInputFile (d);
291 break;
294 break;
296 case '"':
297 case '\'':
298 case '`':
299 token->type = TOKEN_STRING;
300 parseString (token->string, c);
301 token->lineNumber = getInputLineNumber ();
302 token->filePosition = getInputFilePosition ();
303 break;
305 case '<':
307 int d = getcFromInputFile ();
308 if (d == '-')
309 token->type = TOKEN_LEFT_ARROW;
310 else
312 ungetcToInputFile (d);
313 token->type = TOKEN_OTHER;
316 break;
318 case '(':
319 token->type = TOKEN_OPEN_PAREN;
320 break;
322 case ')':
323 token->type = TOKEN_CLOSE_PAREN;
324 break;
326 case '{':
327 token->type = TOKEN_OPEN_CURLY;
328 break;
330 case '}':
331 token->type = TOKEN_CLOSE_CURLY;
332 break;
334 case '[':
335 token->type = TOKEN_OPEN_SQUARE;
336 break;
338 case ']':
339 token->type = TOKEN_CLOSE_SQUARE;
340 break;
342 case '*':
343 token->type = TOKEN_STAR;
344 break;
346 case '.':
347 token->type = TOKEN_DOT;
348 break;
350 case ',':
351 token->type = TOKEN_COMMA;
352 break;
354 default:
355 if (isStartIdentChar (c))
357 parseIdentifier (token->string, c);
358 token->lineNumber = getInputLineNumber ();
359 token->filePosition = getInputFilePosition ();
360 token->keyword = lookupKeyword (vStringValue (token->string), Lang_go);
361 if (isKeyword (token, KEYWORD_NONE))
362 token->type = TOKEN_IDENTIFIER;
363 else
364 token->type = TOKEN_KEYWORD;
366 else
367 token->type = TOKEN_OTHER;
368 break;
371 if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
373 if (token->type == TOKEN_LEFT_ARROW)
374 vStringCatS(signature, "<-");
375 else if (token->type == TOKEN_STRING)
377 // only struct member annotations can appear in function prototypes
378 // so only `` type strings are possible
379 vStringPut(signature, '`');
380 vStringCat(signature, token->string);
381 vStringPut(signature, '`');
383 else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD)
384 vStringCat(signature, token->string);
385 else if (c != EOF)
386 vStringPut(signature, c);
389 lastTokenType = token->type;
392 static bool skipToMatchedNoRead (tokenInfo *const token)
394 int nest_level = 0;
395 tokenType open_token = token->type;
396 tokenType close_token;
398 switch (open_token)
400 case TOKEN_OPEN_PAREN:
401 close_token = TOKEN_CLOSE_PAREN;
402 break;
403 case TOKEN_OPEN_CURLY:
404 close_token = TOKEN_CLOSE_CURLY;
405 break;
406 case TOKEN_OPEN_SQUARE:
407 close_token = TOKEN_CLOSE_SQUARE;
408 break;
409 default:
410 return false;
414 * This routine will skip to a matching closing token.
415 * It will also handle nested tokens.
417 nest_level++;
418 while (nest_level > 0 && !isType (token, TOKEN_EOF))
420 readToken (token);
421 if (isType (token, open_token))
422 nest_level++;
423 else if (isType (token, close_token))
424 nest_level--;
427 return true;
430 static void skipToMatched (tokenInfo *const token)
432 if (skipToMatchedNoRead (token))
433 readToken (token);
436 static bool skipType (tokenInfo *const token)
438 // Type = TypeName | TypeLit | "(" Type ")" .
439 // Skips also function multiple return values "(" Type {"," Type} ")"
440 if (isType (token, TOKEN_OPEN_PAREN))
442 skipToMatched (token);
443 return true;
446 // TypeName = QualifiedIdent.
447 // QualifiedIdent = [ PackageName "." ] identifier .
448 // PackageName = identifier .
449 if (isType (token, TOKEN_IDENTIFIER))
451 readToken (token);
452 if (isType (token, TOKEN_DOT))
454 readToken (token);
455 if (isType (token, TOKEN_IDENTIFIER))
456 readToken (token);
458 return true;
461 // StructType = "struct" "{" { FieldDecl ";" } "}"
462 // InterfaceType = "interface" "{" { MethodSpec ";" } "}" .
463 if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface))
465 readToken (token);
466 // skip over "{}"
467 skipToMatched (token);
468 return true;
471 // ArrayType = "[" ArrayLength "]" ElementType .
472 // SliceType = "[" "]" ElementType .
473 // ElementType = Type .
474 if (isType (token, TOKEN_OPEN_SQUARE))
476 skipToMatched (token);
477 return skipType (token);
480 // PointerType = "*" BaseType .
481 // BaseType = Type .
482 // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType .
483 if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW))
485 readToken (token);
486 return skipType (token);
489 // MapType = "map" "[" KeyType "]" ElementType .
490 // KeyType = Type .
491 if (isKeyword (token, KEYWORD_map))
493 readToken (token);
494 // skip over "[]"
495 skipToMatched (token);
496 return skipType (token);
499 // FunctionType = "func" Signature .
500 // Signature = Parameters [ Result ] .
501 // Result = Parameters | Type .
502 // Parameters = "(" [ ParameterList [ "," ] ] ")" .
503 if (isKeyword (token, KEYWORD_func))
505 readToken (token);
506 // Parameters, skip over "()"
507 skipToMatched (token);
508 // Result is parameters or type or nothing. skipType treats anything
509 // surrounded by parentheses as a type, and does nothing if what
510 // follows is not a type.
511 return skipType (token);
514 return false;
517 static void makeTag (tokenInfo *const token, const goKind kind,
518 tokenInfo *const parent_token, const goKind parent_kind,
519 const char *argList, const char *varType)
521 const char *const name = vStringValue (token->string);
523 tagEntryInfo e;
524 initTagEntry (&e, name, &(GoKinds [kind]));
526 if (!GoKinds [kind].enabled)
527 return;
529 e.lineNumber = token->lineNumber;
530 e.filePosition = token->filePosition;
531 if (argList)
532 e.extensionFields.signature = argList;
533 if (varType)
534 e.extensionFields.varType = varType;
536 if (parent_kind != GOTAG_UNDEFINED && parent_token != NULL)
538 e.extensionFields.scopeKind = &(GoKinds[parent_kind]);
539 e.extensionFields.scopeName = vStringValue (parent_token->string);
541 makeTagEntry (&e);
543 if (scope && isXtagEnabled(XTAG_QUALIFIED_TAGS))
545 vString *qualifiedName = vStringNew ();
546 vStringCopy (qualifiedName, scope);
547 vStringCatS (qualifiedName, ".");
548 vStringCat (qualifiedName, token->string);
549 e.name = vStringValue (qualifiedName);
550 makeTagEntry (&e);
551 vStringDelete (qualifiedName);
555 static void parsePackage (tokenInfo *const token)
557 readToken (token);
558 if (isType (token, TOKEN_IDENTIFIER))
560 makeTag (token, GOTAG_PACKAGE, NULL, GOTAG_UNDEFINED, NULL, NULL);
561 if (!scope && isXtagEnabled(XTAG_QUALIFIED_TAGS))
563 scope = vStringNew ();
564 vStringCopy (scope, token->string);
569 static void parseFunctionOrMethod (tokenInfo *const token)
571 // FunctionDecl = "func" identifier Signature [ Body ] .
572 // Body = Block.
574 // MethodDecl = "func" Receiver MethodName Signature [ Body ] .
575 // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" .
576 // BaseTypeName = identifier .
578 // Skip over receiver.
579 readToken (token);
580 if (isType (token, TOKEN_OPEN_PAREN))
581 skipToMatched (token);
583 if (isType (token, TOKEN_IDENTIFIER))
585 vString *argList;
586 tokenInfo *functionToken = copyToken (token);
588 // Start recording signature
589 signature = vStringNew ();
591 // Skip over parameters.
592 readToken (token);
593 skipToMatchedNoRead (token);
595 vStringStripLeading (signature);
596 vStringStripTrailing (signature);
597 argList = signature;
598 signature = vStringNew ();
600 readToken (token);
602 // Skip over result.
603 skipType (token);
605 // Remove the extra { we have just read
606 vStringStripTrailing (signature);
607 vStringChop (signature);
609 vStringStripLeading (signature);
610 vStringStripTrailing (signature);
611 makeTag (functionToken, GOTAG_FUNCTION, NULL, GOTAG_UNDEFINED, argList->buffer, signature->buffer);
612 deleteToken (functionToken);
613 vStringDelete(signature);
614 vStringDelete(argList);
616 // Stop recording signature
617 signature = NULL;
619 // Skip over function body.
620 if (isType (token, TOKEN_OPEN_CURLY))
621 skipToMatched (token);
625 static void parseStructMembers (tokenInfo *const token, tokenInfo *const parent_token)
627 // StructType = "struct" "{" { FieldDecl ";" } "}" .
628 // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] .
629 // AnonymousField = [ "*" ] TypeName .
630 // Tag = string_lit .
632 readToken (token);
633 if (!isType (token, TOKEN_OPEN_CURLY))
634 return;
636 readToken (token);
637 while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY))
639 tokenInfo *memberCandidate = NULL;
640 bool first = true;
642 while (!isType (token, TOKEN_EOF))
644 if (isType (token, TOKEN_IDENTIFIER))
646 if (first)
648 // could be anonymous field like in 'struct {int}' - we don't know yet
649 memberCandidate = copyToken (token);
650 first = false;
652 else
654 if (memberCandidate)
656 // if we are here, there was a comma and memberCandidate isn't an anonymous field
657 makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
658 deleteToken (memberCandidate);
659 memberCandidate = NULL;
661 makeTag (token, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
663 readToken (token);
665 if (!isType (token, TOKEN_COMMA))
666 break;
667 readToken (token);
670 // in the case of an anonymous field, we already read part of the
671 // type into memberCandidate and skipType() should return false so no tag should
672 // be generated in this case.
673 if (skipType (token) && memberCandidate)
674 makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
676 if (memberCandidate)
677 deleteToken (memberCandidate);
679 while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY)
680 && !isType (token, TOKEN_EOF))
682 readToken (token);
683 skipToMatched (token);
686 if (!isType (token, TOKEN_CLOSE_CURLY))
688 // we are at TOKEN_SEMICOLON
689 readToken (token);
694 static void parseConstTypeVar (tokenInfo *const token, goKind kind)
696 // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) .
697 // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] .
698 // IdentifierList = identifier { "," identifier } .
699 // ExpressionList = Expression { "," Expression } .
700 // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) .
701 // TypeSpec = identifier Type .
702 // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) .
703 // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) .
704 bool usesParens = false;
706 readToken (token);
708 if (isType (token, TOKEN_OPEN_PAREN))
710 usesParens = true;
711 readToken (token);
716 tokenInfo *typeToken = NULL;
718 while (!isType (token, TOKEN_EOF))
720 if (isType (token, TOKEN_IDENTIFIER))
722 if (kind == GOTAG_TYPE)
724 typeToken = copyToken (token);
725 readToken (token);
726 if (isKeyword (token, KEYWORD_struct))
727 makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL, NULL);
728 else if (isKeyword (token, KEYWORD_interface))
729 makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL, NULL);
730 else
731 makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL, NULL);
732 break;
734 else
735 makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL, NULL);
736 readToken (token);
738 if (!isType (token, TOKEN_COMMA))
739 break;
740 readToken (token);
743 if (typeToken)
745 if (isKeyword (token, KEYWORD_struct))
746 parseStructMembers (token, typeToken);
747 else
748 skipType (token);
749 deleteToken (typeToken);
751 else
752 skipType (token);
754 while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN)
755 && !isType (token, TOKEN_EOF))
757 readToken (token);
758 skipToMatched (token);
761 if (usesParens && !isType (token, TOKEN_CLOSE_PAREN))
763 // we are at TOKEN_SEMICOLON
764 readToken (token);
767 while (!isType (token, TOKEN_EOF) &&
768 usesParens && !isType (token, TOKEN_CLOSE_PAREN));
771 static void parseGoFile (tokenInfo *const token)
775 readToken (token);
777 if (isType (token, TOKEN_KEYWORD))
779 switch (token->keyword)
781 case KEYWORD_package:
782 parsePackage (token);
783 break;
784 case KEYWORD_func:
785 parseFunctionOrMethod (token);
786 break;
787 case KEYWORD_const:
788 parseConstTypeVar (token, GOTAG_CONST);
789 break;
790 case KEYWORD_type:
791 parseConstTypeVar (token, GOTAG_TYPE);
792 break;
793 case KEYWORD_var:
794 parseConstTypeVar (token, GOTAG_VAR);
795 break;
796 default:
797 break;
800 else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) ||
801 isType (token, TOKEN_OPEN_SQUARE))
803 skipToMatched (token);
805 } while (token->type != TOKEN_EOF);
808 static void findGoTags (void)
810 tokenInfo *const token = newToken ();
812 parseGoFile (token);
814 deleteToken (token);
815 vStringDelete (scope);
816 scope = NULL;
819 extern parserDefinition *GoParser (void)
821 static const char *const extensions[] = { "go", NULL };
822 parserDefinition *def = parserNew ("Go");
823 def->kinds = GoKinds;
824 def->kindCount = ARRAY_SIZE (GoKinds);
825 def->extensions = extensions;
826 def->parser = findGoTags;
827 def->initialize = initialize;
828 def->keywordTable = GoKeywordTable;
829 def->keywordCount = ARRAY_SIZE (GoKeywordTable);
830 return def;