ctags: Rename Geany-specific tagEntryInfo::arglist to upstream's ::signature
[geany-mirror.git] / ctags / parsers / go.c
blob6b1a88d61999722aa06288303b8ed8032a1cfd6d
1 /*
2 * INCLUDE FILES
3 */
4 #include "general.h" /* must always come first */
6 #include "entry.h"
7 #include "keyword.h"
8 #include "read.h"
9 #include "main.h"
10 #include "vstring.h"
11 #include "options.h"
14 * MACROS
16 #define MAX_SIGNATURE_LENGTH 512
17 #define isType(token,t) (boolean) ((token)->type == (t))
18 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
21 * DATA DECLARATIONS
24 typedef enum eKeywordId {
25 KEYWORD_NONE = -1,
26 KEYWORD_package,
27 KEYWORD_import,
28 KEYWORD_const,
29 KEYWORD_type,
30 KEYWORD_var,
31 KEYWORD_func,
32 KEYWORD_struct,
33 KEYWORD_interface,
34 KEYWORD_map,
35 KEYWORD_chan
36 } keywordId;
38 /* Used to determine whether keyword is valid for the current language and
39 * what its ID is.
41 typedef struct sKeywordDesc {
42 const char *name;
43 keywordId id;
44 } keywordDesc;
46 typedef enum eTokenType {
47 TOKEN_NONE = -1,
48 // Token not important for top-level Go parsing
49 TOKEN_OTHER,
50 TOKEN_KEYWORD,
51 TOKEN_IDENTIFIER,
52 TOKEN_STRING,
53 TOKEN_OPEN_PAREN,
54 TOKEN_CLOSE_PAREN,
55 TOKEN_OPEN_CURLY,
56 TOKEN_CLOSE_CURLY,
57 TOKEN_OPEN_SQUARE,
58 TOKEN_CLOSE_SQUARE,
59 TOKEN_SEMICOLON,
60 TOKEN_STAR,
61 TOKEN_LEFT_ARROW,
62 TOKEN_DOT,
63 TOKEN_COMMA,
64 TOKEN_EOF
65 } tokenType;
67 typedef struct sTokenInfo {
68 tokenType type;
69 keywordId keyword;
70 vString *string; /* the name of the token */
71 unsigned long lineNumber; /* line number of tag */
72 MIOPos filePosition; /* file position of line containing name */
73 } tokenInfo;
76 * DATA DEFINITIONS
79 static int Lang_go;
80 static vString *scope;
81 static vString *signature = NULL;
83 typedef enum {
84 GOTAG_UNDEFINED = -1,
85 GOTAG_PACKAGE,
86 GOTAG_FUNCTION,
87 GOTAG_CONST,
88 GOTAG_TYPE,
89 GOTAG_VAR,
90 GOTAG_STRUCT,
91 GOTAG_INTERFACE,
92 GOTAG_MEMBER
93 } goKind;
95 static kindOption GoKinds[] = {
96 {TRUE, 'p', "package", "packages"},
97 {TRUE, 'f', "func", "functions"},
98 {TRUE, 'c', "const", "constants"},
99 {TRUE, 't', "type", "types"},
100 {TRUE, 'v', "var", "variables"},
101 {TRUE, 's', "struct", "structs"},
102 {TRUE, 'i', "interface", "interfaces"},
103 {TRUE, 'm', "member", "struct members"}
106 static keywordDesc GoKeywordTable[] = {
107 {"package", KEYWORD_package},
108 {"import", KEYWORD_import},
109 {"const", KEYWORD_const},
110 {"type", KEYWORD_type},
111 {"var", KEYWORD_var},
112 {"func", KEYWORD_func},
113 {"struct", KEYWORD_struct},
114 {"interface", KEYWORD_interface},
115 {"map", KEYWORD_map},
116 {"chan", KEYWORD_chan}
120 * FUNCTION DEFINITIONS
123 // XXX UTF-8
124 static boolean isStartIdentChar (const int c)
126 return (boolean)
127 (isalpha (c) || c == '_' || c > 128);
130 static boolean isIdentChar (const int c)
132 return (boolean)
133 (isStartIdentChar (c) || isdigit (c));
136 static void initialize (const langType language)
138 size_t i;
139 const size_t count =
140 sizeof (GoKeywordTable) / sizeof (GoKeywordTable[0]);
141 Lang_go = language;
142 for (i = 0; i < count; ++i)
144 const keywordDesc *const p = &GoKeywordTable[i];
145 addKeyword (p->name, language, (int) p->id);
149 static tokenInfo *newToken (void)
151 tokenInfo *const token = xMalloc (1, tokenInfo);
152 token->type = TOKEN_NONE;
153 token->keyword = KEYWORD_NONE;
154 token->string = vStringNew ();
155 token->lineNumber = getSourceLineNumber ();
156 token->filePosition = getInputFilePosition ();
157 return token;
160 static tokenInfo *copyToken (tokenInfo *other)
162 tokenInfo *const token = xMalloc (1, tokenInfo);
163 token->type = other->type;
164 token->keyword = other->keyword;
165 token->string = vStringNewCopy (other->string);
166 token->lineNumber = other->lineNumber;
167 token->filePosition = other->filePosition;
168 return token;
171 static void deleteToken (tokenInfo * const token)
173 if (token != NULL)
175 vStringDelete (token->string);
176 eFree (token);
181 * Parsing functions
184 static void parseString (vString *const string, const int delimiter)
186 boolean end = FALSE;
187 while (!end)
189 int c = fileGetc ();
190 if (c == EOF)
191 end = TRUE;
192 else if (c == '\\' && delimiter != '`')
194 c = fileGetc ();
195 if (c != '\'' && c != '\"')
196 vStringPut (string, '\\');
197 vStringPut (string, c);
199 else if (c == delimiter)
200 end = TRUE;
201 else
202 vStringPut (string, c);
204 vStringTerminate (string);
207 static void parseIdentifier (vString *const string, const int firstChar)
209 int c = firstChar;
212 vStringPut (string, c);
213 c = fileGetc ();
214 } while (isIdentChar (c));
215 vStringTerminate (string);
216 fileUngetc (c); /* always unget, LF might add a semicolon */
219 static void readToken (tokenInfo *const token)
221 int c;
222 static tokenType lastTokenType = TOKEN_NONE;
223 boolean firstWhitespace = TRUE;
224 boolean whitespace;
226 token->type = TOKEN_NONE;
227 token->keyword = KEYWORD_NONE;
228 vStringClear (token->string);
230 getNextChar:
233 c = fileGetc ();
234 token->lineNumber = getSourceLineNumber ();
235 token->filePosition = getInputFilePosition ();
236 if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER ||
237 lastTokenType == TOKEN_STRING ||
238 lastTokenType == TOKEN_OTHER ||
239 lastTokenType == TOKEN_CLOSE_PAREN ||
240 lastTokenType == TOKEN_CLOSE_CURLY ||
241 lastTokenType == TOKEN_CLOSE_SQUARE))
243 c = ';'; // semicolon injection
245 whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n';
246 if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
248 firstWhitespace = FALSE;
249 vStringPut(signature, ' ');
252 while (whitespace);
254 switch (c)
256 case EOF:
257 token->type = TOKEN_EOF;
258 break;
260 case ';':
261 token->type = TOKEN_SEMICOLON;
262 break;
264 case '/':
266 boolean hasNewline = FALSE;
267 int d = fileGetc ();
268 switch (d)
270 case '/':
271 fileSkipToCharacter ('\n');
272 /* Line comments start with the
273 * character sequence // and
274 * continue through the next
275 * newline. A line comment acts
276 * like a newline. */
277 fileUngetc ('\n');
278 goto getNextChar;
279 case '*':
284 d = fileGetc ();
285 if (d == '\n')
287 hasNewline = TRUE;
289 } while (d != EOF && d != '*');
291 c = fileGetc ();
292 if (c == '/')
293 break;
294 else
295 fileUngetc (c);
296 } while (c != EOF && c != '\0');
298 fileUngetc (hasNewline ? '\n' : ' ');
299 goto getNextChar;
300 default:
301 token->type = TOKEN_OTHER;
302 fileUngetc (d);
303 break;
306 break;
308 case '"':
309 case '\'':
310 case '`':
311 token->type = TOKEN_STRING;
312 parseString (token->string, c);
313 token->lineNumber = getSourceLineNumber ();
314 token->filePosition = getInputFilePosition ();
315 break;
317 case '<':
319 int d = fileGetc ();
320 if (d == '-')
321 token->type = TOKEN_LEFT_ARROW;
322 else
324 fileUngetc (d);
325 token->type = TOKEN_OTHER;
328 break;
330 case '(':
331 token->type = TOKEN_OPEN_PAREN;
332 break;
334 case ')':
335 token->type = TOKEN_CLOSE_PAREN;
336 break;
338 case '{':
339 token->type = TOKEN_OPEN_CURLY;
340 break;
342 case '}':
343 token->type = TOKEN_CLOSE_CURLY;
344 break;
346 case '[':
347 token->type = TOKEN_OPEN_SQUARE;
348 break;
350 case ']':
351 token->type = TOKEN_CLOSE_SQUARE;
352 break;
354 case '*':
355 token->type = TOKEN_STAR;
356 break;
358 case '.':
359 token->type = TOKEN_DOT;
360 break;
362 case ',':
363 token->type = TOKEN_COMMA;
364 break;
366 default:
367 if (isStartIdentChar (c))
369 parseIdentifier (token->string, c);
370 token->lineNumber = getSourceLineNumber ();
371 token->filePosition = getInputFilePosition ();
372 token->keyword = lookupKeyword (vStringValue (token->string), Lang_go);
373 if (isKeyword (token, KEYWORD_NONE))
374 token->type = TOKEN_IDENTIFIER;
375 else
376 token->type = TOKEN_KEYWORD;
378 else
379 token->type = TOKEN_OTHER;
380 break;
383 if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
385 if (token->type == TOKEN_LEFT_ARROW)
386 vStringCatS(signature, "<-");
387 else if (token->type == TOKEN_STRING)
389 // only struct member annotations can appear in function prototypes
390 // so only `` type strings are possible
391 vStringPut(signature, '`');
392 vStringCat(signature, token->string);
393 vStringPut(signature, '`');
395 else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD)
396 vStringCat(signature, token->string);
397 else if (c != EOF)
398 vStringPut(signature, c);
401 lastTokenType = token->type;
404 static boolean skipToMatchedNoRead (tokenInfo *const token)
406 int nest_level = 0;
407 tokenType open_token = token->type;
408 tokenType close_token;
410 switch (open_token)
412 case TOKEN_OPEN_PAREN:
413 close_token = TOKEN_CLOSE_PAREN;
414 break;
415 case TOKEN_OPEN_CURLY:
416 close_token = TOKEN_CLOSE_CURLY;
417 break;
418 case TOKEN_OPEN_SQUARE:
419 close_token = TOKEN_CLOSE_SQUARE;
420 break;
421 default:
422 return FALSE;
426 * This routine will skip to a matching closing token.
427 * It will also handle nested tokens.
429 nest_level++;
430 while (nest_level > 0 && !isType (token, TOKEN_EOF))
432 readToken (token);
433 if (isType (token, open_token))
434 nest_level++;
435 else if (isType (token, close_token))
436 nest_level--;
439 return TRUE;
442 static void skipToMatched (tokenInfo *const token)
444 if (skipToMatchedNoRead (token))
445 readToken (token);
448 static boolean skipType (tokenInfo *const token)
450 // Type = TypeName | TypeLit | "(" Type ")" .
451 // Skips also function multiple return values "(" Type {"," Type} ")"
452 if (isType (token, TOKEN_OPEN_PAREN))
454 skipToMatched (token);
455 return TRUE;
458 // TypeName = QualifiedIdent.
459 // QualifiedIdent = [ PackageName "." ] identifier .
460 // PackageName = identifier .
461 if (isType (token, TOKEN_IDENTIFIER))
463 readToken (token);
464 if (isType (token, TOKEN_DOT))
466 readToken (token);
467 if (isType (token, TOKEN_IDENTIFIER))
468 readToken (token);
470 return TRUE;
473 // StructType = "struct" "{" { FieldDecl ";" } "}"
474 // InterfaceType = "interface" "{" { MethodSpec ";" } "}" .
475 if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface))
477 readToken (token);
478 // skip over "{}"
479 skipToMatched (token);
480 return TRUE;
483 // ArrayType = "[" ArrayLength "]" ElementType .
484 // SliceType = "[" "]" ElementType .
485 // ElementType = Type .
486 if (isType (token, TOKEN_OPEN_SQUARE))
488 skipToMatched (token);
489 return skipType (token);
492 // PointerType = "*" BaseType .
493 // BaseType = Type .
494 // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType .
495 if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW))
497 readToken (token);
498 return skipType (token);
501 // MapType = "map" "[" KeyType "]" ElementType .
502 // KeyType = Type .
503 if (isKeyword (token, KEYWORD_map))
505 readToken (token);
506 // skip over "[]"
507 skipToMatched (token);
508 return skipType (token);
511 // FunctionType = "func" Signature .
512 // Signature = Parameters [ Result ] .
513 // Result = Parameters | Type .
514 // Parameters = "(" [ ParameterList [ "," ] ] ")" .
515 if (isKeyword (token, KEYWORD_func))
517 readToken (token);
518 // Parameters, skip over "()"
519 skipToMatched (token);
520 // Result is parameters or type or nothing. skipType treats anything
521 // surrounded by parentheses as a type, and does nothing if what
522 // follows is not a type.
523 return skipType (token);
526 return FALSE;
529 static void makeTag (tokenInfo *const token, const goKind kind,
530 tokenInfo *const parent_token, const goKind parent_kind,
531 const char *argList, const char *varType)
533 const char *const name = vStringValue (token->string);
535 tagEntryInfo e;
536 initTagEntry (&e, name);
538 if (!GoKinds [kind].enabled)
539 return;
541 e.lineNumber = token->lineNumber;
542 e.filePosition = token->filePosition;
543 e.kindName = GoKinds [kind].name;
544 e.kind = GoKinds [kind].letter;
545 if (argList)
546 e.extensionFields.signature = argList;
547 if (varType)
548 e.extensionFields.varType = varType;
550 if (parent_kind != GOTAG_UNDEFINED && parent_token != NULL)
552 e.extensionFields.scope[0] = GoKinds[parent_kind].name;
553 e.extensionFields.scope[1] = vStringValue (parent_token->string);
555 makeTagEntry (&e);
557 if (scope && Option.include.qualifiedTags)
559 vString *qualifiedName = vStringNew ();
560 vStringCopy (qualifiedName, scope);
561 vStringCatS (qualifiedName, ".");
562 vStringCat (qualifiedName, token->string);
563 e.name = vStringValue (qualifiedName);
564 makeTagEntry (&e);
565 vStringDelete (qualifiedName);
569 static void parsePackage (tokenInfo *const token)
571 readToken (token);
572 if (isType (token, TOKEN_IDENTIFIER))
574 makeTag (token, GOTAG_PACKAGE, NULL, GOTAG_UNDEFINED, NULL, NULL);
575 if (!scope && Option.include.qualifiedTags)
577 scope = vStringNew ();
578 vStringCopy (scope, token->string);
583 static void parseFunctionOrMethod (tokenInfo *const token)
585 // FunctionDecl = "func" identifier Signature [ Body ] .
586 // Body = Block.
588 // MethodDecl = "func" Receiver MethodName Signature [ Body ] .
589 // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" .
590 // BaseTypeName = identifier .
592 // Skip over receiver.
593 readToken (token);
594 if (isType (token, TOKEN_OPEN_PAREN))
595 skipToMatched (token);
597 if (isType (token, TOKEN_IDENTIFIER))
599 vString *argList;
600 tokenInfo *functionToken = copyToken (token);
602 // Start recording signature
603 signature = vStringNew ();
605 // Skip over parameters.
606 readToken (token);
607 skipToMatchedNoRead (token);
609 vStringStripLeading (signature);
610 vStringStripTrailing (signature);
611 argList = signature;
612 signature = vStringNew ();
614 readToken (token);
616 // Skip over result.
617 skipType (token);
619 // Remove the extra { we have just read
620 vStringStripTrailing (signature);
621 vStringChop (signature);
623 vStringStripLeading (signature);
624 vStringStripTrailing (signature);
625 makeTag (functionToken, GOTAG_FUNCTION, NULL, GOTAG_UNDEFINED, argList->buffer, signature->buffer);
626 deleteToken (functionToken);
627 vStringDelete(signature);
628 vStringDelete(argList);
630 // Stop recording signature
631 signature = NULL;
633 // Skip over function body.
634 if (isType (token, TOKEN_OPEN_CURLY))
635 skipToMatched (token);
639 static void parseStructMembers (tokenInfo *const token, tokenInfo *const parent_token)
641 // StructType = "struct" "{" { FieldDecl ";" } "}" .
642 // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] .
643 // AnonymousField = [ "*" ] TypeName .
644 // Tag = string_lit .
646 readToken (token);
647 if (!isType (token, TOKEN_OPEN_CURLY))
648 return;
650 readToken (token);
651 while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY))
653 tokenInfo *memberCandidate = NULL;
654 boolean first = TRUE;
656 while (!isType (token, TOKEN_EOF))
658 if (isType (token, TOKEN_IDENTIFIER))
660 if (first)
662 // could be anonymous field like in 'struct {int}' - we don't know yet
663 memberCandidate = copyToken (token);
664 first = FALSE;
666 else
668 if (memberCandidate)
670 // if we are here, there was a comma and memberCandidate isn't an anonymous field
671 makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
672 deleteToken (memberCandidate);
673 memberCandidate = NULL;
675 makeTag (token, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
677 readToken (token);
679 if (!isType (token, TOKEN_COMMA))
680 break;
681 readToken (token);
684 // in the case of an anonymous field, we already read part of the
685 // type into memberCandidate and skipType() should return FALSE so no tag should
686 // be generated in this case.
687 if (skipType (token) && memberCandidate)
688 makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL, NULL);
690 if (memberCandidate)
691 deleteToken (memberCandidate);
693 while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY)
694 && !isType (token, TOKEN_EOF))
696 readToken (token);
697 skipToMatched (token);
700 if (!isType (token, TOKEN_CLOSE_CURLY))
702 // we are at TOKEN_SEMICOLON
703 readToken (token);
708 static void parseConstTypeVar (tokenInfo *const token, goKind kind)
710 // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) .
711 // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] .
712 // IdentifierList = identifier { "," identifier } .
713 // ExpressionList = Expression { "," Expression } .
714 // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) .
715 // TypeSpec = identifier Type .
716 // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) .
717 // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) .
718 boolean usesParens = FALSE;
720 readToken (token);
722 if (isType (token, TOKEN_OPEN_PAREN))
724 usesParens = TRUE;
725 readToken (token);
730 tokenInfo *typeToken = NULL;
732 while (!isType (token, TOKEN_EOF))
734 if (isType (token, TOKEN_IDENTIFIER))
736 if (kind == GOTAG_TYPE)
738 typeToken = copyToken (token);
739 readToken (token);
740 if (isKeyword (token, KEYWORD_struct))
741 makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL, NULL);
742 else if (isKeyword (token, KEYWORD_interface))
743 makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL, NULL);
744 else
745 makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL, NULL);
746 break;
748 else
749 makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL, NULL);
750 readToken (token);
752 if (!isType (token, TOKEN_COMMA))
753 break;
754 readToken (token);
757 if (typeToken)
759 if (isKeyword (token, KEYWORD_struct))
760 parseStructMembers (token, typeToken);
761 else
762 skipType (token);
763 deleteToken (typeToken);
765 else
766 skipType (token);
768 while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN)
769 && !isType (token, TOKEN_EOF))
771 readToken (token);
772 skipToMatched (token);
775 if (usesParens && !isType (token, TOKEN_CLOSE_PAREN))
777 // we are at TOKEN_SEMICOLON
778 readToken (token);
781 while (!isType (token, TOKEN_EOF) &&
782 usesParens && !isType (token, TOKEN_CLOSE_PAREN));
785 static void parseGoFile (tokenInfo *const token)
789 readToken (token);
791 if (isType (token, TOKEN_KEYWORD))
793 switch (token->keyword)
795 case KEYWORD_package:
796 parsePackage (token);
797 break;
798 case KEYWORD_func:
799 parseFunctionOrMethod (token);
800 break;
801 case KEYWORD_const:
802 parseConstTypeVar (token, GOTAG_CONST);
803 break;
804 case KEYWORD_type:
805 parseConstTypeVar (token, GOTAG_TYPE);
806 break;
807 case KEYWORD_var:
808 parseConstTypeVar (token, GOTAG_VAR);
809 break;
810 default:
811 break;
814 else if (isType (token, TOKEN_OPEN_PAREN) || isType (token, TOKEN_OPEN_CURLY) ||
815 isType (token, TOKEN_OPEN_SQUARE))
817 skipToMatched (token);
819 } while (token->type != TOKEN_EOF);
822 static void findGoTags (void)
824 tokenInfo *const token = newToken ();
826 parseGoFile (token);
828 deleteToken (token);
829 vStringDelete (scope);
830 scope = NULL;
833 extern parserDefinition *GoParser (void)
835 static const char *const extensions[] = { "go", NULL };
836 parserDefinition *def = parserNew ("Go");
837 def->kinds = GoKinds;
838 def->kindCount = KIND_COUNT (GoKinds);
839 def->extensions = extensions;
840 def->parser = findGoTags;
841 def->initialize = initialize;
842 return def;