4 #include "general.h" /* must always come first */
16 #define MAX_SIGNATURE_LENGTH 512
17 #define isType(token,t) (boolean) ((token)->type == (t))
18 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
24 typedef enum eKeywordId
{
38 /* Used to determine whether keyword is valid for the current language and
41 typedef struct sKeywordDesc
{
46 typedef enum eTokenType
{
48 // Token not important for top-level Go parsing
67 typedef struct sTokenInfo
{
70 vString
*string
; /* the name of the token */
71 unsigned long lineNumber
; /* line number of tag */
72 MIOPos filePosition
; /* file position of line containing name */
80 static vString
*scope
;
81 static vString
*signature
= NULL
;
95 static kindOption GoKinds
[] = {
96 {TRUE
, 'p', "package", "packages"},
97 {TRUE
, 'f', "func", "functions"},
98 {TRUE
, 'c', "const", "constants"},
99 {TRUE
, 't', "type", "types"},
100 {TRUE
, 'v', "var", "variables"},
101 {TRUE
, 's', "struct", "structs"},
102 {TRUE
, 'i', "interface", "interfaces"},
103 {TRUE
, 'm', "member", "struct members"}
106 static keywordDesc GoKeywordTable
[] = {
107 {"package", KEYWORD_package
},
108 {"import", KEYWORD_import
},
109 {"const", KEYWORD_const
},
110 {"type", KEYWORD_type
},
111 {"var", KEYWORD_var
},
112 {"func", KEYWORD_func
},
113 {"struct", KEYWORD_struct
},
114 {"interface", KEYWORD_interface
},
115 {"map", KEYWORD_map
},
116 {"chan", KEYWORD_chan
}
120 * FUNCTION DEFINITIONS
124 static boolean
isStartIdentChar (const int c
)
127 (isalpha (c
) || c
== '_' || c
> 128);
130 static boolean
isIdentChar (const int c
)
133 (isStartIdentChar (c
) || isdigit (c
));
136 static void initialize (const langType language
)
140 sizeof (GoKeywordTable
) / sizeof (GoKeywordTable
[0]);
142 for (i
= 0; i
< count
; ++i
)
144 const keywordDesc
*const p
= &GoKeywordTable
[i
];
145 addKeyword (p
->name
, language
, (int) p
->id
);
149 static tokenInfo
*newToken (void)
151 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
152 token
->type
= TOKEN_NONE
;
153 token
->keyword
= KEYWORD_NONE
;
154 token
->string
= vStringNew ();
155 token
->lineNumber
= getSourceLineNumber ();
156 token
->filePosition
= getInputFilePosition ();
160 static tokenInfo
*copyToken (tokenInfo
*other
)
162 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
163 token
->type
= other
->type
;
164 token
->keyword
= other
->keyword
;
165 token
->string
= vStringNewCopy (other
->string
);
166 token
->lineNumber
= other
->lineNumber
;
167 token
->filePosition
= other
->filePosition
;
171 static void deleteToken (tokenInfo
* const token
)
175 vStringDelete (token
->string
);
184 static void parseString (vString
*const string
, const int delimiter
)
192 else if (c
== '\\' && delimiter
!= '`')
195 if (c
!= '\'' && c
!= '\"')
196 vStringPut (string
, '\\');
197 vStringPut (string
, c
);
199 else if (c
== delimiter
)
202 vStringPut (string
, c
);
204 vStringTerminate (string
);
207 static void parseIdentifier (vString
*const string
, const int firstChar
)
212 vStringPut (string
, c
);
214 } while (isIdentChar (c
));
215 vStringTerminate (string
);
216 fileUngetc (c
); /* always unget, LF might add a semicolon */
219 static void readToken (tokenInfo
*const token
)
222 static tokenType lastTokenType
= TOKEN_NONE
;
223 boolean firstWhitespace
= TRUE
;
226 token
->type
= TOKEN_NONE
;
227 token
->keyword
= KEYWORD_NONE
;
228 vStringClear (token
->string
);
234 token
->lineNumber
= getSourceLineNumber ();
235 token
->filePosition
= getInputFilePosition ();
236 if (c
== '\n' && (lastTokenType
== TOKEN_IDENTIFIER
||
237 lastTokenType
== TOKEN_STRING
||
238 lastTokenType
== TOKEN_OTHER
||
239 lastTokenType
== TOKEN_CLOSE_PAREN
||
240 lastTokenType
== TOKEN_CLOSE_CURLY
||
241 lastTokenType
== TOKEN_CLOSE_SQUARE
))
243 c
= ';'; // semicolon injection
245 whitespace
= c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n';
246 if (signature
&& whitespace
&& firstWhitespace
&& vStringLength (signature
) < MAX_SIGNATURE_LENGTH
)
248 firstWhitespace
= FALSE
;
249 vStringPut(signature
, ' ');
257 token
->type
= TOKEN_EOF
;
261 token
->type
= TOKEN_SEMICOLON
;
266 boolean hasNewline
= FALSE
;
271 fileSkipToCharacter ('\n');
272 /* Line comments start with the
273 * character sequence // and
274 * continue through the next
275 * newline. A line comment acts
289 } while (d
!= EOF
&& d
!= '*');
296 } while (c
!= EOF
&& c
!= '\0');
298 fileUngetc (hasNewline
? '\n' : ' ');
301 token
->type
= TOKEN_OTHER
;
311 token
->type
= TOKEN_STRING
;
312 parseString (token
->string
, c
);
313 token
->lineNumber
= getSourceLineNumber ();
314 token
->filePosition
= getInputFilePosition ();
321 token
->type
= TOKEN_LEFT_ARROW
;
325 token
->type
= TOKEN_OTHER
;
331 token
->type
= TOKEN_OPEN_PAREN
;
335 token
->type
= TOKEN_CLOSE_PAREN
;
339 token
->type
= TOKEN_OPEN_CURLY
;
343 token
->type
= TOKEN_CLOSE_CURLY
;
347 token
->type
= TOKEN_OPEN_SQUARE
;
351 token
->type
= TOKEN_CLOSE_SQUARE
;
355 token
->type
= TOKEN_STAR
;
359 token
->type
= TOKEN_DOT
;
363 token
->type
= TOKEN_COMMA
;
367 if (isStartIdentChar (c
))
369 parseIdentifier (token
->string
, c
);
370 token
->lineNumber
= getSourceLineNumber ();
371 token
->filePosition
= getInputFilePosition ();
372 token
->keyword
= lookupKeyword (vStringValue (token
->string
), Lang_go
);
373 if (isKeyword (token
, KEYWORD_NONE
))
374 token
->type
= TOKEN_IDENTIFIER
;
376 token
->type
= TOKEN_KEYWORD
;
379 token
->type
= TOKEN_OTHER
;
383 if (signature
&& vStringLength (signature
) < MAX_SIGNATURE_LENGTH
)
385 if (token
->type
== TOKEN_LEFT_ARROW
)
386 vStringCatS(signature
, "<-");
387 else if (token
->type
== TOKEN_STRING
)
389 // only struct member annotations can appear in function prototypes
390 // so only `` type strings are possible
391 vStringPut(signature
, '`');
392 vStringCat(signature
, token
->string
);
393 vStringPut(signature
, '`');
395 else if (token
->type
== TOKEN_IDENTIFIER
|| token
->type
== TOKEN_KEYWORD
)
396 vStringCat(signature
, token
->string
);
398 vStringPut(signature
, c
);
401 lastTokenType
= token
->type
;
404 static boolean
skipToMatchedNoRead (tokenInfo
*const token
)
407 tokenType open_token
= token
->type
;
408 tokenType close_token
;
412 case TOKEN_OPEN_PAREN
:
413 close_token
= TOKEN_CLOSE_PAREN
;
415 case TOKEN_OPEN_CURLY
:
416 close_token
= TOKEN_CLOSE_CURLY
;
418 case TOKEN_OPEN_SQUARE
:
419 close_token
= TOKEN_CLOSE_SQUARE
;
426 * This routine will skip to a matching closing token.
427 * It will also handle nested tokens.
430 while (nest_level
> 0 && !isType (token
, TOKEN_EOF
))
433 if (isType (token
, open_token
))
435 else if (isType (token
, close_token
))
442 static void skipToMatched (tokenInfo
*const token
)
444 if (skipToMatchedNoRead (token
))
448 static boolean
skipType (tokenInfo
*const token
)
450 // Type = TypeName | TypeLit | "(" Type ")" .
451 // Skips also function multiple return values "(" Type {"," Type} ")"
452 if (isType (token
, TOKEN_OPEN_PAREN
))
454 skipToMatched (token
);
458 // TypeName = QualifiedIdent.
459 // QualifiedIdent = [ PackageName "." ] identifier .
460 // PackageName = identifier .
461 if (isType (token
, TOKEN_IDENTIFIER
))
464 if (isType (token
, TOKEN_DOT
))
467 if (isType (token
, TOKEN_IDENTIFIER
))
473 // StructType = "struct" "{" { FieldDecl ";" } "}"
474 // InterfaceType = "interface" "{" { MethodSpec ";" } "}" .
475 if (isKeyword (token
, KEYWORD_struct
) || isKeyword (token
, KEYWORD_interface
))
479 skipToMatched (token
);
483 // ArrayType = "[" ArrayLength "]" ElementType .
484 // SliceType = "[" "]" ElementType .
485 // ElementType = Type .
486 if (isType (token
, TOKEN_OPEN_SQUARE
))
488 skipToMatched (token
);
489 return skipType (token
);
492 // PointerType = "*" BaseType .
494 // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType .
495 if (isType (token
, TOKEN_STAR
) || isKeyword (token
, KEYWORD_chan
) || isType (token
, TOKEN_LEFT_ARROW
))
498 return skipType (token
);
501 // MapType = "map" "[" KeyType "]" ElementType .
503 if (isKeyword (token
, KEYWORD_map
))
507 skipToMatched (token
);
508 return skipType (token
);
511 // FunctionType = "func" Signature .
512 // Signature = Parameters [ Result ] .
513 // Result = Parameters | Type .
514 // Parameters = "(" [ ParameterList [ "," ] ] ")" .
515 if (isKeyword (token
, KEYWORD_func
))
518 // Parameters, skip over "()"
519 skipToMatched (token
);
520 // Result is parameters or type or nothing. skipType treats anything
521 // surrounded by parentheses as a type, and does nothing if what
522 // follows is not a type.
523 return skipType (token
);
529 static void makeTag (tokenInfo
*const token
, const goKind kind
,
530 tokenInfo
*const parent_token
, const goKind parent_kind
,
531 const char *argList
, const char *varType
)
533 const char *const name
= vStringValue (token
->string
);
536 initTagEntry (&e
, name
);
538 if (!GoKinds
[kind
].enabled
)
541 e
.lineNumber
= token
->lineNumber
;
542 e
.filePosition
= token
->filePosition
;
543 e
.kindName
= GoKinds
[kind
].name
;
544 e
.kind
= GoKinds
[kind
].letter
;
546 e
.extensionFields
.signature
= argList
;
548 e
.extensionFields
.varType
= varType
;
550 if (parent_kind
!= GOTAG_UNDEFINED
&& parent_token
!= NULL
)
552 e
.extensionFields
.scope
[0] = GoKinds
[parent_kind
].name
;
553 e
.extensionFields
.scope
[1] = vStringValue (parent_token
->string
);
557 if (scope
&& Option
.include
.qualifiedTags
)
559 vString
*qualifiedName
= vStringNew ();
560 vStringCopy (qualifiedName
, scope
);
561 vStringCatS (qualifiedName
, ".");
562 vStringCat (qualifiedName
, token
->string
);
563 e
.name
= vStringValue (qualifiedName
);
565 vStringDelete (qualifiedName
);
569 static void parsePackage (tokenInfo
*const token
)
572 if (isType (token
, TOKEN_IDENTIFIER
))
574 makeTag (token
, GOTAG_PACKAGE
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
575 if (!scope
&& Option
.include
.qualifiedTags
)
577 scope
= vStringNew ();
578 vStringCopy (scope
, token
->string
);
583 static void parseFunctionOrMethod (tokenInfo
*const token
)
585 // FunctionDecl = "func" identifier Signature [ Body ] .
588 // MethodDecl = "func" Receiver MethodName Signature [ Body ] .
589 // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" .
590 // BaseTypeName = identifier .
592 // Skip over receiver.
594 if (isType (token
, TOKEN_OPEN_PAREN
))
595 skipToMatched (token
);
597 if (isType (token
, TOKEN_IDENTIFIER
))
600 tokenInfo
*functionToken
= copyToken (token
);
602 // Start recording signature
603 signature
= vStringNew ();
605 // Skip over parameters.
607 skipToMatchedNoRead (token
);
609 vStringStripLeading (signature
);
610 vStringStripTrailing (signature
);
612 signature
= vStringNew ();
619 // Remove the extra { we have just read
620 vStringStripTrailing (signature
);
621 vStringChop (signature
);
623 vStringStripLeading (signature
);
624 vStringStripTrailing (signature
);
625 makeTag (functionToken
, GOTAG_FUNCTION
, NULL
, GOTAG_UNDEFINED
, argList
->buffer
, signature
->buffer
);
626 deleteToken (functionToken
);
627 vStringDelete(signature
);
628 vStringDelete(argList
);
630 // Stop recording signature
633 // Skip over function body.
634 if (isType (token
, TOKEN_OPEN_CURLY
))
635 skipToMatched (token
);
639 static void parseStructMembers (tokenInfo
*const token
, tokenInfo
*const parent_token
)
641 // StructType = "struct" "{" { FieldDecl ";" } "}" .
642 // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] .
643 // AnonymousField = [ "*" ] TypeName .
644 // Tag = string_lit .
647 if (!isType (token
, TOKEN_OPEN_CURLY
))
651 while (!isType (token
, TOKEN_EOF
) && !isType (token
, TOKEN_CLOSE_CURLY
))
653 tokenInfo
*memberCandidate
= NULL
;
654 boolean first
= TRUE
;
656 while (!isType (token
, TOKEN_EOF
))
658 if (isType (token
, TOKEN_IDENTIFIER
))
662 // could be anonymous field like in 'struct {int}' - we don't know yet
663 memberCandidate
= copyToken (token
);
670 // if we are here, there was a comma and memberCandidate isn't an anonymous field
671 makeTag (memberCandidate
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
672 deleteToken (memberCandidate
);
673 memberCandidate
= NULL
;
675 makeTag (token
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
679 if (!isType (token
, TOKEN_COMMA
))
684 // in the case of an anonymous field, we already read part of the
685 // type into memberCandidate and skipType() should return FALSE so no tag should
686 // be generated in this case.
687 if (skipType (token
) && memberCandidate
)
688 makeTag (memberCandidate
, GOTAG_MEMBER
, parent_token
, GOTAG_STRUCT
, NULL
, NULL
);
691 deleteToken (memberCandidate
);
693 while (!isType (token
, TOKEN_SEMICOLON
) && !isType (token
, TOKEN_CLOSE_CURLY
)
694 && !isType (token
, TOKEN_EOF
))
697 skipToMatched (token
);
700 if (!isType (token
, TOKEN_CLOSE_CURLY
))
702 // we are at TOKEN_SEMICOLON
708 static void parseConstTypeVar (tokenInfo
*const token
, goKind kind
)
710 // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) .
711 // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] .
712 // IdentifierList = identifier { "," identifier } .
713 // ExpressionList = Expression { "," Expression } .
714 // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) .
715 // TypeSpec = identifier Type .
716 // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) .
717 // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) .
718 boolean usesParens
= FALSE
;
722 if (isType (token
, TOKEN_OPEN_PAREN
))
730 tokenInfo
*typeToken
= NULL
;
732 while (!isType (token
, TOKEN_EOF
))
734 if (isType (token
, TOKEN_IDENTIFIER
))
736 if (kind
== GOTAG_TYPE
)
738 typeToken
= copyToken (token
);
740 if (isKeyword (token
, KEYWORD_struct
))
741 makeTag (typeToken
, GOTAG_STRUCT
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
742 else if (isKeyword (token
, KEYWORD_interface
))
743 makeTag (typeToken
, GOTAG_INTERFACE
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
745 makeTag (typeToken
, kind
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
749 makeTag (token
, kind
, NULL
, GOTAG_UNDEFINED
, NULL
, NULL
);
752 if (!isType (token
, TOKEN_COMMA
))
759 if (isKeyword (token
, KEYWORD_struct
))
760 parseStructMembers (token
, typeToken
);
763 deleteToken (typeToken
);
768 while (!isType (token
, TOKEN_SEMICOLON
) && !isType (token
, TOKEN_CLOSE_PAREN
)
769 && !isType (token
, TOKEN_EOF
))
772 skipToMatched (token
);
775 if (usesParens
&& !isType (token
, TOKEN_CLOSE_PAREN
))
777 // we are at TOKEN_SEMICOLON
781 while (!isType (token
, TOKEN_EOF
) &&
782 usesParens
&& !isType (token
, TOKEN_CLOSE_PAREN
));
785 static void parseGoFile (tokenInfo
*const token
)
791 if (isType (token
, TOKEN_KEYWORD
))
793 switch (token
->keyword
)
795 case KEYWORD_package
:
796 parsePackage (token
);
799 parseFunctionOrMethod (token
);
802 parseConstTypeVar (token
, GOTAG_CONST
);
805 parseConstTypeVar (token
, GOTAG_TYPE
);
808 parseConstTypeVar (token
, GOTAG_VAR
);
814 else if (isType (token
, TOKEN_OPEN_PAREN
) || isType (token
, TOKEN_OPEN_CURLY
) ||
815 isType (token
, TOKEN_OPEN_SQUARE
))
817 skipToMatched (token
);
819 } while (token
->type
!= TOKEN_EOF
);
822 static void findGoTags (void)
824 tokenInfo
*const token
= newToken ();
829 vStringDelete (scope
);
833 extern parserDefinition
*GoParser (void)
835 static const char *const extensions
[] = { "go", NULL
};
836 parserDefinition
*def
= parserNew ("Go");
837 def
->kinds
= GoKinds
;
838 def
->kindCount
= KIND_COUNT (GoKinds
);
839 def
->extensions
= extensions
;
840 def
->parser
= findGoTags
;
841 def
->initialize
= initialize
;