2 * This source code is released for free distribution under the terms of the
3 * GNU General Public License version 2 or (at your option) any later version.
7 #include "general.h" /* must always come first */
22 #define MAX_SIGNATURE_LENGTH 512
23 #define isType(token,t) (bool) ((token)->type == (t))
24 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
42 typedef int keywordId
; /* to allow KEYWORD_NONE */
44 typedef enum eTokenType
{
46 // Token not important for top-level Go parsing
65 typedef struct sTokenInfo
{
68 vString
*string
; /* the name of the token */
69 unsigned long lineNumber
; /* line number of tag */
70 MIOPos filePosition
; /* file position of line containing name */
78 static vString
*scope
;
79 static vString
*signature
= NULL
;
93 static kindOption GoKinds
[] = {
94 {true, 'p', "package", "packages"},
95 {true, 'f', "func", "functions"},
96 {true, 'c', "const", "constants"},
97 {true, 't', "type", "types"},
98 {true, 'v', "var", "variables"},
99 {true, 's', "struct", "structs"},
100 {true, 'i', "interface", "interfaces"},
101 {true, 'm', "member", "struct members"}
104 static const keywordTable GoKeywordTable
[] = {
105 {"package", KEYWORD_package
},
106 {"import", KEYWORD_import
},
107 {"const", KEYWORD_const
},
108 {"type", KEYWORD_type
},
109 {"var", KEYWORD_var
},
110 {"func", KEYWORD_func
},
111 {"struct", KEYWORD_struct
},
112 {"interface", KEYWORD_interface
},
113 {"map", KEYWORD_map
},
114 {"chan", KEYWORD_chan
}
118 * FUNCTION DEFINITIONS
122 static bool isStartIdentChar (const int c
)
125 (isalpha (c
) || c
== '_' || c
> 128);
128 static bool isIdentChar (const int c
)
131 (isStartIdentChar (c
) || isdigit (c
));
134 static void initialize (const langType language
)
139 static tokenInfo
*newToken (void)
141 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
142 token
->type
= TOKEN_NONE
;
143 token
->keyword
= KEYWORD_NONE
;
144 token
->string
= vStringNew ();
145 token
->lineNumber
= getInputLineNumber ();
146 token
->filePosition
= getInputFilePosition ();
150 static tokenInfo
*copyToken (tokenInfo
*other
)
152 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
153 token
->type
= other
->type
;
154 token
->keyword
= other
->keyword
;
155 token
->string
= vStringNewCopy (other
->string
);
156 token
->lineNumber
= other
->lineNumber
;
157 token
->filePosition
= other
->filePosition
;
161 static void deleteToken (tokenInfo
* const token
)
165 vStringDelete (token
->string
);
174 static void parseString (vString
*const string
, const int delimiter
)
179 int c
= getcFromInputFile ();
182 else if (c
== '\\' && delimiter
!= '`')
184 c
= getcFromInputFile ();
185 if (c
!= '\'' && c
!= '\"')
186 vStringPut (string
, '\\');
187 vStringPut (string
, c
);
189 else if (c
== delimiter
)
192 vStringPut (string
, c
);
196 static void parseIdentifier (vString
*const string
, const int firstChar
)
201 vStringPut (string
, c
);
202 c
= getcFromInputFile ();
203 } while (isIdentChar (c
));
204 ungetcToInputFile (c
); /* always unget, LF might add a semicolon */
207 static void readToken (tokenInfo
*const token
)
210 static tokenType lastTokenType
= TOKEN_NONE
;
211 bool firstWhitespace
= true;
214 token
->type
= TOKEN_NONE
;
215 token
->keyword
= KEYWORD_NONE
;
216 vStringClear (token
->string
);
221 c
= getcFromInputFile ();
222 token
->lineNumber
= getInputLineNumber ();
223 token
->filePosition
= getInputFilePosition ();
224 if (c
== '\n' && (lastTokenType
== TOKEN_IDENTIFIER
||
225 lastTokenType
== TOKEN_STRING
||
226 lastTokenType
== TOKEN_OTHER
||
227 lastTokenType
== TOKEN_CLOSE_PAREN
||
228 lastTokenType
== TOKEN_CLOSE_CURLY
||
229 lastTokenType
== TOKEN_CLOSE_SQUARE
))
231 c
= ';'; // semicolon injection
233 whitespace
= c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n';
234 if (signature
&& whitespace
&& firstWhitespace
&& vStringLength (signature
) < MAX_SIGNATURE_LENGTH
)
236 firstWhitespace
= false;
237 vStringPut(signature
, ' ');
245 token
->type
= TOKEN_EOF
;
249 token
->type
= TOKEN_SEMICOLON
;
254 bool hasNewline
= false;
255 int d
= getcFromInputFile ();
259 skipToCharacterInInputFile ('\n');
260 /* Line comments start with the
261 * character sequence // and
262 * continue through the next
263 * newline. A line comment acts
265 ungetcToInputFile ('\n');
272 d
= getcFromInputFile ();
277 } while (d
!= EOF
&& d
!= '*');
279 c
= getcFromInputFile ();
283 ungetcToInputFile (c
);
284 } while (c
!= EOF
&& c
!= '\0');
286 ungetcToInputFile (hasNewline
? '\n' : ' ');
289 token
->type
= TOKEN_OTHER
;
290 ungetcToInputFile (d
);
299 token
->type
= TOKEN_STRING
;
300 parseString (token
->string
, c
);
301 token
->lineNumber
= getInputLineNumber ();
302 token
->filePosition
= getInputFilePosition ();
307 int d
= getcFromInputFile ();
309 token
->type
= TOKEN_LEFT_ARROW
;
312 ungetcToInputFile (d
);
313 token
->type
= TOKEN_OTHER
;
319 token
->type
= TOKEN_OPEN_PAREN
;
323 token
->type
= TOKEN_CLOSE_PAREN
;
327 token
->type
= TOKEN_OPEN_CURLY
;
331 token
->type
= TOKEN_CLOSE_CURLY
;
335 token
->type
= TOKEN_OPEN_SQUARE
;
339 token
->type
= TOKEN_CLOSE_SQUARE
;
343 token
->type
= TOKEN_STAR
;
347 token
->type
= TOKEN_DOT
;
351 token
->type
= TOKEN_COMMA
;
355 if (isStartIdentChar (c
))
357 parseIdentifier (token
->string
, c
);
358 token
->lineNumber
= getInputLineNumber ();
359 token
->filePosition
= getInputFilePosition ();
360 token
->keyword
= lookupKeyword (vStringValue (token
->string
), Lang_go
);
361 if (isKeyword (token
, KEYWORD_NONE
))
362 token
->type
= TOKEN_IDENTIFIER
;
364 token
->type
= TOKEN_KEYWORD
;
367 token
->type
= TOKEN_OTHER
;
371 if (signature
&& vStringLength (signature
) < MAX_SIGNATURE_LENGTH
)
373 if (token
->type
== TOKEN_LEFT_ARROW
)
374 vStringCatS(signature
, "<-");
375 else if (token
->type
== TOKEN_STRING
)
377 // only struct member annotations can appear in function prototypes
378 // so only `` type strings are possible
379 vStringPut(signature
, '`');
380 vStringCat(signature
, token
->string
);
381 vStringPut(signature
, '`');
383 else if (token
->type
== TOKEN_IDENTIFIER
|| token
->type
== TOKEN_KEYWORD
)
384 vStringCat(signature
, token
->string
);
386 vStringPut(signature
, c
);
389 lastTokenType
= token
->type
;
392 static bool skipToMatchedNoRead (tokenInfo
*const token
)
395 tokenType open_token
= token
->type
;
396 tokenType close_token
;
400 case TOKEN_OPEN_PAREN
:
401 close_token
= TOKEN_CLOSE_PAREN
;
403 case TOKEN_OPEN_CURLY
:
404 close_token
= TOKEN_CLOSE_CURLY
;
406 case TOKEN_OPEN_SQUARE
:
407 close_token
= TOKEN_CLOSE_SQUARE
;
414 * This routine will skip to a matching closing token.
415 * It will also handle nested tokens.
418 while (nest_level
> 0 && !isType (token
, TOKEN_EOF
))
421 if (isType (token
, open_token
))
423 else if (isType (token
, close_token
))
430 static void skipToMatched (tokenInfo
*const token
)
432 if (skipToMatchedNoRead (token
))
436 static bool skipType (tokenInfo
*const token
)
438 // Type = TypeName | TypeLit | "(" Type ")" .
439 // Skips also function multiple return values "(" Type {"," Type} ")"
440 if (isType (token
, TOKEN_OPEN_PAREN
))
442 skipToMatched (token
);
446 // TypeName = QualifiedIdent.
447 // QualifiedIdent = [ PackageName "." ] identifier .
448 // PackageName = identifier .
449 if (isType (token
, TOKEN_IDENTIFIER
))
452 if (isType (token
, TOKEN_DOT
))
455 if (isType (token
, TOKEN_IDENTIFIER
))
461 // StructType = "struct" "{" { FieldDecl ";" } "}"
462 // InterfaceType = "interface" "{" { MethodSpec ";" } "}" .
463 if (isKeyword (token
, KEYWORD_struct
) || isKeyword (token
, KEYWORD_interface
))
467 skipToMatched (token
);
471 // ArrayType = "[" ArrayLength "]" ElementType .
472 // SliceType = "[" "]" ElementType .
473 // ElementType = Type .
474 if (isType (token
, TOKEN_OPEN_SQUARE
))
476 skipToMatched (token
);
477 return skipType (token
);
480 // PointerType = "*" BaseType .
482 // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType .
483 if (isType (token
, TOKEN_STAR
) || isKeyword (token
, KEYWORD_chan
) || isType (token
, TOKEN_LEFT_ARROW
))
486 return skipType (token
);
489 // MapType = "map" "[" KeyType "]" ElementType .
491 if (isKeyword (token
, KEYWORD_map
))
495 skipToMatched (token
);
496 return skipType (token
);
499 // FunctionType = "func" Signature .
500 // Signature = Parameters [ Result ] .
501 // Result = Parameters | Type .
502 // Parameters = "(" [ ParameterList [ "," ] ] ")" .
503 if (isKeyword (token
, KEYWORD_func
))
506 // Parameters, skip over "()"
507 skipToMatched (token
);
508 // Result is parameters or type or nothing. skipType treats anything
509 // surrounded by parentheses as a type, and does nothing if what
510 // follows is not a type.
511 return skipType (token
);
517 static void makeTag (tokenInfo
*const token
, const goKind kind
,
518 tokenInfo
*const parent_token
, const goKind parent_kind
,
519 const char *argList
, const char *varType
)
521 const char *const name
= vStringValue (token
->string
);
524 initTagEntry (&e
, name
, &(GoKinds
[kind
]));
526 if (!GoKinds
[kind
].enabled
)
529 e
.lineNumber
= token
->lineNumber
;
530 e
.filePosition
= token
->filePosition
;
532 e
.extensionFields
.signature
= argList
;
534 e
.extensionFields
.varType
= varType
;
536 if (parent_kind
!= GOTAG_UNDEFINED
&& parent_token
!= NULL
)
538 e
.extensionFields
.scopeKind
= &(GoKinds
[parent_kind
]);
539 e
.extensionFields
.scopeName
= vStringValue (parent_token
->string
);
543 if (scope
&& isXtagEnabled(XTAG_QUALIFIED_TAGS
))
545 vString
*qualifiedName
= vStringNew ();
546 vStringCopy (qualifiedName
, scope
);
547 vStringCatS (qualifiedName
, ".");
548 vStringCat (qualifiedName
, token
->string
);
549 e
.name
= vStringValue (qualifiedName
);
551 vStringDelete (qualifiedName
);
555 static void parsePackage (tokenInfo
*const token
)
558 if (isType (token
, TOKEN_IDENTIFIER
))
560 makeTag (token
, GOTAG_PACKAGE
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
561 if (!scope
&& isXtagEnabled(XTAG_QUALIFIED_TAGS
))
563 scope
= vStringNew ();
564 vStringCopy (scope
, token
->string
);
569 static void parseFunctionOrMethod (tokenInfo
*const token
)
571 // FunctionDecl = "func" identifier Signature [ Body ] .
574 // MethodDecl = "func" Receiver MethodName Signature [ Body ] .
575 // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" .
576 // BaseTypeName = identifier .
578 // Skip over receiver.
580 if (isType (token
, TOKEN_OPEN_PAREN
))
581 skipToMatched (token
);
583 if (isType (token
, TOKEN_IDENTIFIER
))
586 tokenInfo
*functionToken
= copyToken (token
);
588 // Start recording signature
589 signature
= vStringNew ();
591 // Skip over parameters.
593 skipToMatchedNoRead (token
);
595 vStringStripLeading (signature
);
596 vStringStripTrailing (signature
);
598 signature
= vStringNew ();
605 // Remove the extra { we have just read
606 vStringStripTrailing (signature
);
607 vStringChop (signature
);
609 vStringStripLeading (signature
);
610 vStringStripTrailing (signature
);
611 makeTag (functionToken
, GOTAG_FUNCTION
, NULL
, GOTAG_UNDEFINED
, argList
->buffer
, signature
->buffer
);
612 deleteToken (functionToken
);
613 vStringDelete(signature
);
614 vStringDelete(argList
);
616 // Stop recording signature
619 // Skip over function body.
620 if (isType (token
, TOKEN_OPEN_CURLY
))
621 skipToMatched (token
);
625 static void parseStructMembers (tokenInfo
*const token
, tokenInfo
*const parent_token
)
627 // StructType = "struct" "{" { FieldDecl ";" } "}" .
628 // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] .
629 // AnonymousField = [ "*" ] TypeName .
630 // Tag = string_lit .
633 if (!isType (token
, TOKEN_OPEN_CURLY
))
637 while (!isType (token
, TOKEN_EOF
) && !isType (token
, TOKEN_CLOSE_CURLY
))
639 tokenInfo
*memberCandidate
= NULL
;
642 while (!isType (token
, TOKEN_EOF
))
644 if (isType (token
, TOKEN_IDENTIFIER
))
648 // could be anonymous field like in 'struct {int}' - we don't know yet
649 memberCandidate
= copyToken (token
);
656 // if we are here, there was a comma and memberCandidate isn't an anonymous field
657 makeTag (memberCandidate
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
658 deleteToken (memberCandidate
);
659 memberCandidate
= NULL
;
661 makeTag (token
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
665 if (!isType (token
, TOKEN_COMMA
))
670 // in the case of an anonymous field, we already read part of the
671 // type into memberCandidate and skipType() should return false so no tag should
672 // be generated in this case.
673 if (skipType (token
) && memberCandidate
)
674 makeTag (memberCandidate
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
677 deleteToken (memberCandidate
);
679 while (!isType (token
, TOKEN_SEMICOLON
) && !isType (token
, TOKEN_CLOSE_CURLY
)
680 && !isType (token
, TOKEN_EOF
))
683 skipToMatched (token
);
686 if (!isType (token
, TOKEN_CLOSE_CURLY
))
688 // we are at TOKEN_SEMICOLON
694 static void parseConstTypeVar (tokenInfo
*const token
, goKind kind
)
696 // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) .
697 // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] .
698 // IdentifierList = identifier { "," identifier } .
699 // ExpressionList = Expression { "," Expression } .
700 // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) .
701 // TypeSpec = identifier Type .
702 // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) .
703 // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) .
704 bool usesParens
= false;
708 if (isType (token
, TOKEN_OPEN_PAREN
))
716 tokenInfo
*typeToken
= NULL
;
718 while (!isType (token
, TOKEN_EOF
))
720 if (isType (token
, TOKEN_IDENTIFIER
))
722 if (kind
== GOTAG_TYPE
)
724 typeToken
= copyToken (token
);
726 if (isKeyword (token
, KEYWORD_struct
))
727 makeTag (typeToken
, GOTAG_STRUCT
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
728 else if (isKeyword (token
, KEYWORD_interface
))
729 makeTag (typeToken
, GOTAG_INTERFACE
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
731 makeTag (typeToken
, kind
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
735 makeTag (token
, kind
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
738 if (!isType (token
, TOKEN_COMMA
))
745 if (isKeyword (token
, KEYWORD_struct
))
746 parseStructMembers (token
, typeToken
);
749 deleteToken (typeToken
);
754 while (!isType (token
, TOKEN_SEMICOLON
) && !isType (token
, TOKEN_CLOSE_PAREN
)
755 && !isType (token
, TOKEN_EOF
))
758 skipToMatched (token
);
761 if (usesParens
&& !isType (token
, TOKEN_CLOSE_PAREN
))
763 // we are at TOKEN_SEMICOLON
767 while (!isType (token
, TOKEN_EOF
) &&
768 usesParens
&& !isType (token
, TOKEN_CLOSE_PAREN
));
771 static void parseGoFile (tokenInfo
*const token
)
777 if (isType (token
, TOKEN_KEYWORD
))
779 switch (token
->keyword
)
781 case KEYWORD_package
:
782 parsePackage (token
);
785 parseFunctionOrMethod (token
);
788 parseConstTypeVar (token
, GOTAG_CONST
);
791 parseConstTypeVar (token
, GOTAG_TYPE
);
794 parseConstTypeVar (token
, GOTAG_VAR
);
800 else if (isType (token
, TOKEN_OPEN_PAREN
) || isType (token
, TOKEN_OPEN_CURLY
) ||
801 isType (token
, TOKEN_OPEN_SQUARE
))
803 skipToMatched (token
);
805 } while (token
->type
!= TOKEN_EOF
);
808 static void findGoTags (void)
810 tokenInfo
*const token
= newToken ();
815 vStringDelete (scope
);
819 extern parserDefinition
*GoParser (void)
821 static const char *const extensions
[] = { "go", NULL
};
822 parserDefinition
*def
= parserNew ("Go");
823 def
->kinds
= GoKinds
;
824 def
->kindCount
= ARRAY_SIZE (GoKinds
);
825 def
->extensions
= extensions
;
826 def
->parser
= findGoTags
;
827 def
->initialize
= initialize
;
828 def
->keywordTable
= GoKeywordTable
;
829 def
->keywordCount
= ARRAY_SIZE (GoKeywordTable
);