ctags/parsers/rust.c

   1 /*
   2 *
   3 *   This source code is released for free distribution under the terms of the
   4 *   GNU General Public License.
   5 *
   6 *   This module contains functions for generating tags for Rust files.
   7 */
   8
   9 /*
  10 *   INCLUDE FILES
  11 */
  12 #include "general.h"    /* must always come first */
  13 #include "main.h"
  14
  15 #include <string.h>
  16
  17 #include "keyword.h"
  18 #include "parse.h"
  19 #include "entry.h"
  20 #include "options.h"
  21 #include "read.h"
  22 #include "vstring.h"
  23
  24 /*
  25 *   MACROS
  26 */
  27 #define MAX_STRING_LENGTH 256
  28
  29 /*
  30 *   DATA DECLARATIONS
  31 */
  32
  33 typedef enum {
  34         K_MOD,
  35         K_STRUCT,
  36         K_TRAIT,
  37         K_IMPL,
  38         K_FN,
  39         K_ENUM,
  40         K_TYPE,
  41         K_STATIC,
  42         K_MACRO,
  43         K_FIELD,
  44         K_VARIANT,
  45         K_METHOD,
  46         K_NONE
  47 } RustKind;
  48
  49 static kindOption rustKinds[] = {
  50         {TRUE, 'n', "module", "module"},
  51         {TRUE, 's', "struct", "structural type"},
  52         {TRUE, 'i', "interface", "trait interface"},
  53         {TRUE, 'c', "implementation", "implementation"},
  54         {TRUE, 'f', "function", "Function"},
  55         {TRUE, 'g', "enum", "Enum"},
  56         {TRUE, 't', "typedef", "Type Alias"},
  57         {TRUE, 'v', "variable", "Global variable"},
  58         {TRUE, 'M', "macro", "Macro Definition"},
  59         {TRUE, 'm', "field", "A struct field"},
  60         {TRUE, 'e', "enumerator", "An enum variant"},
  61         {TRUE, 'F', "method", "A method"},
  62 };
  63
  64 typedef enum {
  65         TOKEN_WHITESPACE,
  66         TOKEN_STRING,
  67         TOKEN_IDENT,
  68         TOKEN_LSHIFT,
  69         TOKEN_RSHIFT,
  70         TOKEN_RARROW,
  71         TOKEN_EOF
  72 } tokenType;
  73
  74 typedef struct {
  75         /* Characters */
  76         int cur_c;
  77         int next_c;
  78
  79         /* Tokens */
  80         int cur_token;
  81         vString* token_str;
  82         unsigned long line;
  83         MIOPos pos;
  84 } lexerState;
  85
  86 /*
  87 *   FUNCTION PROTOTYPES
  88 */
  89
  90 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope);
  91
  92 /*
  93 *   FUNCTION DEFINITIONS
  94 */
  95
  96 /* Resets the scope string to the old length */
  97 static void resetScope (vString *scope, size_t old_len)
  98 {
  99         scope->length = old_len;
 100         scope->buffer[old_len] = '\0';
 101 }
 102
 103 /* Adds a name to the end of the scope string */
 104 static void addToScope (vString *scope, vString *name)
 105 {
 106         if (vStringLength(scope) > 0)
 107                 vStringCatS(scope, "::");
 108         vStringCat(scope, name);
 109 }
 110
 111 /* Write the lexer's current token to string, taking care of special tokens */
 112 static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
 113 {
 114         switch (lexer->cur_token)
 115         {
 116                 case TOKEN_IDENT:
 117                         vStringCat(out_str, lexer->token_str);
 118                         break;
 119                 case TOKEN_STRING:
 120                         vStringCat(out_str, lexer->token_str);
 121                         break;
 122                 case TOKEN_WHITESPACE:
 123                         vStringPut(out_str, ' ');
 124                         break;
 125                 case TOKEN_LSHIFT:
 126                         vStringCatS(out_str, "<<");
 127                         break;
 128                 case TOKEN_RSHIFT:
 129                         vStringCatS(out_str, ">>");
 130                         break;
 131                 case TOKEN_RARROW:
 132                         vStringCatS(out_str, "->");
 133                         break;
 134                 default:
 135                         vStringPut(out_str, (char) lexer->cur_token);
 136         }
 137 }
 138
 139 /* Reads a character from the file */
 140 static void advanceChar (lexerState *lexer)
 141 {
 142         lexer->cur_c = lexer->next_c;
 143         lexer->next_c = fileGetc();
 144 }
 145
 146 /* Reads N characters from the file */
 147 static void advanceNChar (lexerState *lexer, int n)
 148 {
 149         while (n--)
 150                 advanceChar(lexer);
 151 }
 152
 153 /* Store the current character in lexerState::token_str if there is space
 154  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
 155 static void advanceAndStoreChar (lexerState *lexer)
 156 {
 157         if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
 158                 vStringPut(lexer->token_str, (char) lexer->cur_c);
 159         advanceChar(lexer);
 160 }
 161
 162 static boolean isWhitespace (int c)
 163 {
 164         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 165 }
 166
 167 static boolean isAscii (int c)
 168 {
 169         return (c >= 0) && (c < 0x80);
 170 }
 171
 172 /* This isn't quite right for Unicode identifiers */
 173 static boolean isIdentifierStart (int c)
 174 {
 175         return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
 176 }
 177
 178 /* This isn't quite right for Unicode identifiers */
 179 static boolean isIdentifierContinue (int c)
 180 {
 181         return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
 182 }
 183
 184 static void scanWhitespace (lexerState *lexer)
 185 {
 186         while (isWhitespace(lexer->cur_c))
 187                 advanceChar(lexer);
 188 }
 189
 190 /* Normal line comments start with two /'s and continue until the next \n
 191  * (potentially after a \r). Additionally, a shebang in the beginning of the
 192  * file also counts as a line comment as long as it is not this sequence: #![ .
 193  * Block comments start with / followed by a * and end with a * followed by a /.
 194  * Unlike in C/C++ they nest. */
 195 static void scanComments (lexerState *lexer)
 196 {
 197         /* // */
 198         if (lexer->next_c == '/')
 199         {
 200                 advanceNChar(lexer, 2);
 201                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 202                         advanceChar(lexer);
 203         }
 204         /* #! */
 205         else if (lexer->next_c == '!')
 206         {
 207                 advanceNChar(lexer, 2);
 208                 /* If it is exactly #![ then it is not a comment, but an attribute */
 209                 if (lexer->cur_c == '[')
 210                         return;
 211                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 212                         advanceChar(lexer);
 213         }
 214         /* block comment */
 215         else if (lexer->next_c == '*')
 216         {
 217                 int level = 1;
 218                 advanceNChar(lexer, 2);
 219                 while (lexer->cur_c != EOF && level > 0)
 220                 {
 221                         if (lexer->cur_c == '*' && lexer->next_c == '/')
 222                         {
 223                                 level--;
 224                                 advanceNChar(lexer, 2);
 225                         }
 226                         else if (lexer->cur_c == '/' && lexer->next_c == '*')
 227                         {
 228                                 level++;
 229                                 advanceNChar(lexer, 2);
 230                         }
 231                         else
 232                         {
 233                                 advanceChar(lexer);
 234                         }
 235                 }
 236         }
 237 }
 238
 239 static void scanIdentifier (lexerState *lexer)
 240 {
 241         vStringClear(lexer->token_str);
 242         do
 243         {
 244                 advanceAndStoreChar(lexer);
 245         } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
 246 }
 247
 248 /* Double-quoted strings, we only care about the \" escape. These
 249  * last past the end of the line, so be careful not too store too much
 250  * of them (see MAX_STRING_LENGTH). The only place we look at their
 251  * contents is in the function definitions, and there the valid strings are
 252  * things like "C" and "Rust" */
 253 static void scanString (lexerState *lexer)
 254 {
 255         vStringClear(lexer->token_str);
 256         advanceAndStoreChar(lexer);
 257         while (lexer->cur_c != EOF && lexer->cur_c != '"')
 258         {
 259                 if (lexer->cur_c == '\\' && lexer->next_c == '"')
 260                         advanceAndStoreChar(lexer);
 261                 advanceAndStoreChar(lexer);
 262         }
 263         advanceAndStoreChar(lexer);
 264 }
 265
 266 /* Raw strings look like this: r"" or r##""## where the number of
 267  * hashes must match */
 268 static void scanRawString (lexerState *lexer)
 269 {
 270         size_t num_initial_hashes = 0;
 271         vStringClear(lexer->token_str);
 272         advanceAndStoreChar(lexer);
 273         /* Count how many leading hashes there are */
 274         while (lexer->cur_c == '#')
 275         {
 276                 num_initial_hashes++;
 277                 advanceAndStoreChar(lexer);
 278         }
 279         if (lexer->cur_c != '"')
 280                 return;
 281         advanceAndStoreChar(lexer);
 282         while (lexer->cur_c != EOF)
 283         {
 284                 /* Count how many trailing hashes there are. If the number is equal or more
 285                  * than the number of leading hashes, break. */
 286                 if (lexer->cur_c == '"')
 287                 {
 288                         size_t num_trailing_hashes = 0;
 289                         advanceAndStoreChar(lexer);
 290                         while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
 291                         {
 292                                 num_trailing_hashes++;
 293
 294                                 advanceAndStoreChar(lexer);
 295                         }
 296                         if (num_trailing_hashes == num_initial_hashes)
 297                                 break;
 298                 }
 299                 else
 300                 {
 301                         advanceAndStoreChar(lexer);
 302                 }
 303         }
 304 }
 305
 306 /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
 307  * 'lifetime. We'll use this approximate regexp for the literals:
 308  * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
 309  * token as a string, so it gets preserved as is for function signatures with
 310  * lifetimes. */
 311 static void scanCharacterOrLifetime (lexerState *lexer)
 312 {
 313         vStringClear(lexer->token_str);
 314         advanceAndStoreChar(lexer);
 315
 316         if (lexer->cur_c == '\\')
 317         {
 318                 advanceAndStoreChar(lexer);
 319                 /* The \' \\ \' \' (literally '\'') case */
 320                 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
 321                 {
 322                         advanceAndStoreChar(lexer);
 323                         advanceAndStoreChar(lexer);
 324                 }
 325                 /* The \' \\ [^']+ \' case */
 326                 else
 327                 {
 328                         while (lexer->cur_c != EOF && lexer->cur_c != '\'')
 329                                 advanceAndStoreChar(lexer);
 330                 }
 331         }
 332         /* The \' [^'] \' case */
 333         else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
 334         {
 335                 advanceAndStoreChar(lexer);
 336                 advanceAndStoreChar(lexer);
 337         }
 338         /* Otherwise it is malformed, or a lifetime */
 339 }
 340
 341 /* Advances the parser one token, optionally skipping whitespace
 342  * (otherwise it is concatenated and returned as a single whitespace token).
 343  * Whitespace is needed to properly render function signatures. Unrecognized
 344  * token starts are stored literally, e.g. token may equal to a character '#'. */
 345 static int advanceToken (lexerState *lexer, boolean skip_whitspace)
 346 {
 347         boolean have_whitespace = FALSE;
 348         lexer->line = getSourceLineNumber();
 349         lexer->pos = getInputFilePosition();
 350         while (lexer->cur_c != EOF)
 351         {
 352                 if (isWhitespace(lexer->cur_c))
 353                 {
 354                         scanWhitespace(lexer);
 355                         have_whitespace = TRUE;
 356                 }
 357                 else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
 358                 {
 359                         scanComments(lexer);
 360                         have_whitespace = TRUE;
 361                 }
 362                 else
 363                 {
 364                         if (have_whitespace && !skip_whitspace)
 365                                 return lexer->cur_token = TOKEN_WHITESPACE;
 366                         break;
 367                 }
 368         }
 369         lexer->line = getSourceLineNumber();
 370         lexer->pos = getInputFilePosition();
 371         while (lexer->cur_c != EOF)
 372         {
 373                 if (lexer->cur_c == '"')
 374                 {
 375                         scanString(lexer);
 376                         return lexer->cur_token = TOKEN_STRING;
 377                 }
 378                 else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
 379                 {
 380                         scanRawString(lexer);
 381                         return lexer->cur_token = TOKEN_STRING;
 382                 }
 383                 else if (lexer->cur_c == '\'')
 384                 {
 385                         scanCharacterOrLifetime(lexer);
 386                         return lexer->cur_token = TOKEN_STRING;
 387                 }
 388                 else if (isIdentifierStart(lexer->cur_c))
 389                 {
 390                         scanIdentifier(lexer);
 391                         return lexer->cur_token = TOKEN_IDENT;
 392                 }
 393                 /* These shift tokens aren't too important for tag-generation per se,
 394                  * but they confuse the skipUntil code which tracks the <> pairs. */
 395                 else if (lexer->cur_c == '>' && lexer->next_c == '>')
 396                 {
 397                         advanceNChar(lexer, 2);
 398                         return lexer->cur_token = TOKEN_RSHIFT;
 399                 }
 400                 else if (lexer->cur_c == '<' && lexer->next_c == '<')
 401                 {
 402                         advanceNChar(lexer, 2);
 403                         return lexer->cur_token = TOKEN_LSHIFT;
 404                 }
 405                 else if (lexer->cur_c == '-' && lexer->next_c == '>')
 406                 {
 407                         advanceNChar(lexer, 2);
 408                         return lexer->cur_token = TOKEN_RARROW;
 409                 }
 410                 else
 411                 {
 412                         int c = lexer->cur_c;
 413                         advanceChar(lexer);
 414                         return lexer->cur_token = c;
 415                 }
 416         }
 417         return lexer->cur_token = TOKEN_EOF;
 418 }
 419
 420 static void initLexer (lexerState *lexer)
 421 {
 422         advanceNChar(lexer, 2);
 423         lexer->token_str = vStringNew();
 424
 425         if (lexer->cur_c == '#' && lexer->next_c == '!')
 426                 scanComments(lexer);
 427         advanceToken(lexer, TRUE);
 428 }
 429
 430 static void deInitLexer (lexerState *lexer)
 431 {
 432         vStringDelete(lexer->token_str);
 433         lexer->token_str = NULL;
 434 }
 435
 436 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
 437 {
 438         if (kind == K_NONE)
 439                 return;
 440         tagEntryInfo tag;
 441         initTagEntry(&tag, ident->buffer);
 442
 443         tag.lineNumber = line;
 444         tag.filePosition = pos;
 445         tag.sourceFileName = getSourceFileName();
 446
 447         tag.kindName = rustKinds[kind].name;
 448         tag.kind = rustKinds[kind].letter;
 449
 450         tag.extensionFields.signature = arg_list;
 451         tag.extensionFields.varType = type;
 452         if (parent_kind != K_NONE)
 453         {
 454                 tag.extensionFields.scope[0] = rustKinds[parent_kind].name;
 455                 tag.extensionFields.scope[1] = scope->buffer;
 456         }
 457         makeTagEntry(&tag);
 458 }
 459
 460 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
 461  * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
 462 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
 463 {
 464         int angle_level = 0;
 465         int paren_level = 0;
 466         int brace_level = 0;
 467         int bracket_level = 0;
 468         while (lexer->cur_token != TOKEN_EOF)
 469         {
 470                 if (angle_level == 0 && paren_level == 0 && brace_level == 0
 471                     && bracket_level == 0)
 472                 {
 473                         int ii = 0;
 474                         for(ii = 0; ii < num_goal_tokens; ii++)
 475                         {
 476                                 if (lexer->cur_token == goal_tokens[ii])
 477                                 {
 478                                         break;
 479                                 }
 480                         }
 481                         if (ii < num_goal_tokens)
 482                                 break;
 483                 }
 484                 switch (lexer->cur_token)
 485                 {
 486                         case '<':
 487                                 angle_level++;
 488                                 break;
 489                         case '(':
 490                                 paren_level++;
 491                                 break;
 492                         case '{':
 493                                 brace_level++;
 494                                 break;
 495                         case '[':
 496                                 bracket_level++;
 497                                 break;
 498                         case '>':
 499                                 angle_level--;
 500                                 break;
 501                         case ')':
 502                                 paren_level--;
 503                                 break;
 504                         case '}':
 505                                 brace_level--;
 506                                 break;
 507                         case ']':
 508                                 bracket_level--;
 509                                 break;
 510                         case TOKEN_RSHIFT:
 511                                 if (angle_level >= 2)
 512                                         angle_level -= 2;
 513                                 break;
 514                         /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
 515                         default:
 516                                 break;
 517                 }
 518                 /* Has to be after the token switch to catch the case when we start with the initial level token */
 519                 if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
 520                     && bracket_level == 0)
 521                         break;
 522                 advanceToken(lexer, TRUE);
 523         }
 524 }
 525
 526 /* Function format:
 527  * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
 528 static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
 529 {
 530         int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
 531         vString *name;
 532         vString *arg_list;
 533         unsigned long line;
 534         MIOPos pos;
 535         int paren_level = 0;
 536         boolean found_paren = FALSE;
 537         boolean valid_signature = TRUE;
 538
 539         advanceToken(lexer, TRUE);
 540         if (lexer->cur_token != TOKEN_IDENT)
 541                 return;
 542
 543         name = vStringNewCopy(lexer->token_str);
 544         arg_list = vStringNew();
 545
 546         line = lexer->line;
 547         pos = lexer->pos;
 548
 549         advanceToken(lexer, TRUE);
 550
 551         /* HACK: This is a bit coarse as far as what tag entry means by
 552          * 'arglist'... */
 553         while (lexer->cur_token != '{' && lexer->cur_token != ';')
 554         {
 555                 if (lexer->cur_token == '}')
 556                 {
 557                         valid_signature = FALSE;
 558                         break;
 559                 }
 560                 else if (lexer->cur_token == '(')
 561                 {
 562                         found_paren = TRUE;
 563                         paren_level++;
 564                 }
 565                 else if (lexer->cur_token == ')')
 566                 {
 567                         paren_level--;
 568                         if (paren_level < 0)
 569                         {
 570                                 valid_signature = FALSE;
 571                                 break;
 572                         }
 573                 }
 574                 else if (lexer->cur_token == TOKEN_EOF)
 575                 {
 576                         valid_signature = FALSE;
 577                         break;
 578                 }
 579                 writeCurTokenToStr(lexer, arg_list);
 580                 advanceToken(lexer, FALSE);
 581         }
 582         if (!found_paren || paren_level != 0)
 583                 valid_signature = FALSE;
 584
 585         if (valid_signature)
 586         {
 587                 vStringStripTrailing(arg_list);
 588                 addTag(name, NULL, arg_list->buffer, kind, line, pos, scope, parent_kind);
 589                 addToScope(scope, name);
 590                 parseBlock(lexer, TRUE, kind, scope);
 591         }
 592
 593         vStringDelete(name);
 594         vStringDelete(arg_list);
 595 }
 596
 597 /* Mod format:
 598  * "mod" <ident> "{" [<body>] "}"
 599  * "mod" <ident> ";"*/
 600 static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
 601 {
 602         advanceToken(lexer, TRUE);
 603         if (lexer->cur_token != TOKEN_IDENT)
 604                 return;
 605
 606         addTag(lexer->token_str, NULL, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
 607         addToScope(scope, lexer->token_str);
 608
 609         advanceToken(lexer, TRUE);
 610
 611         parseBlock(lexer, TRUE, K_MOD, scope);
 612 }
 613
 614 /* Trait format:
 615  * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
 616  */
 617 static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
 618 {
 619         int goal_tokens[] = {'{'};
 620
 621         advanceToken(lexer, TRUE);
 622         if (lexer->cur_token != TOKEN_IDENT)
 623                 return;
 624
 625         addTag(lexer->token_str, NULL, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
 626         addToScope(scope, lexer->token_str);
 627
 628         advanceToken(lexer, TRUE);
 629
 630         skipUntil(lexer, goal_tokens, 1);
 631
 632         parseBlock(lexer, TRUE, K_TRAIT, scope);
 633 }
 634
 635 /* Skips type blocks of the form <T:T<T>, ...> */
 636 static void skipTypeBlock (lexerState *lexer)
 637 {
 638         if (lexer->cur_token == '<')
 639         {
 640                 skipUntil(lexer, NULL, 0);
 641                 advanceToken(lexer, TRUE);
 642         }
 643 }
 644
 645 /* Essentially grabs the last ident before 'for', '<' and '{', which
 646  * tends to correspond to what we want as the impl tag entry name */
 647 static void parseQualifiedType (lexerState *lexer, vString* name)
 648 {
 649         while (lexer->cur_token != TOKEN_EOF)
 650         {
 651                 if (lexer->cur_token == TOKEN_IDENT)
 652                 {
 653                         if (strcmp(lexer->token_str->buffer, "for") == 0
 654                                 || strcmp(lexer->token_str->buffer, "where") == 0)
 655                                 break;
 656                         vStringClear(name);
 657                         vStringCat(name, lexer->token_str);
 658                 }
 659                 else if (lexer->cur_token == '<' || lexer->cur_token == '{')
 660                 {
 661                         break;
 662                 }
 663                 advanceToken(lexer, TRUE);
 664         }
 665         skipTypeBlock(lexer);
 666 }
 667
 668 /* Impl format:
 669  * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
 670  */
 671 static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
 672 {
 673         unsigned long line;
 674         MIOPos pos;
 675         vString *name;
 676
 677         advanceToken(lexer, TRUE);
 678
 679         line = lexer->line;
 680         pos = lexer->pos;
 681
 682         skipTypeBlock(lexer);
 683
 684         name = vStringNew();
 685
 686         parseQualifiedType(lexer, name);
 687
 688         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "for") == 0)
 689         {
 690                 advanceToken(lexer, TRUE);
 691                 parseQualifiedType(lexer, name);
 692         }
 693
 694         addTag(name, NULL, NULL, K_IMPL, line, pos, scope, parent_kind);
 695         addToScope(scope, name);
 696
 697         parseBlock(lexer, TRUE, K_IMPL, scope);
 698
 699         vStringDelete(name);
 700 }
 701
 702 /* Static format:
 703  * "static" ["mut"] <ident>
 704  */
 705 static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
 706 {
 707         advanceToken(lexer, TRUE);
 708         if (lexer->cur_token != TOKEN_IDENT)
 709                 return;
 710         if (strcmp(lexer->token_str->buffer, "mut") == 0)
 711         {
 712                 advanceToken(lexer, TRUE);
 713         }
 714         if (lexer->cur_token != TOKEN_IDENT)
 715                 return;
 716
 717         addTag(lexer->token_str, NULL, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
 718 }
 719
 720 /* Type format:
 721  * "type" <ident>
 722  */
 723 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
 724 {
 725         advanceToken(lexer, TRUE);
 726         if (lexer->cur_token != TOKEN_IDENT)
 727                 return;
 728
 729         addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
 730 }
 731
 732 /* Structs and enums are very similar syntax-wise.
 733  * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
 734  * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
 735  * the enum's definition (e.g. for the type bounds)
 736  *
 737  * Struct/Enum format:
 738  * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
 739  * "struct/enum" <ident>[<type_bounds>] ";"
 740  * */
 741 static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, boolean is_struct)
 742 {
 743         int kind = is_struct ? K_STRUCT : K_ENUM;
 744         int field_kind = is_struct ? K_FIELD : K_VARIANT;
 745         int goal_tokens1[] = {';', '{'};
 746
 747         advanceToken(lexer, TRUE);
 748         if (lexer->cur_token != TOKEN_IDENT)
 749                 return;
 750
 751         addTag(lexer->token_str, NULL, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
 752         addToScope(scope, lexer->token_str);
 753
 754         skipUntil(lexer, goal_tokens1, 2);
 755
 756         if (lexer->cur_token == '{')
 757         {
 758                 vString *field_name = vStringNew();
 759                 while (lexer->cur_token != TOKEN_EOF)
 760                 {
 761                         int goal_tokens2[] = {'}', ','};
 762                         /* Skip attributes. Format:
 763                          * #[..] or #![..]
 764                          * */
 765                         if (lexer->cur_token == '#')
 766                         {
 767                                 advanceToken(lexer, TRUE);
 768                                 if (lexer->cur_token == '!')
 769                                         advanceToken(lexer, TRUE);
 770                                 if (lexer->cur_token == '[')
 771                                 {
 772                                         /* It's an attribute, skip it. */
 773                                         skipUntil(lexer, NULL, 0);
 774                                 }
 775                                 else
 776                                 {
 777                                         /* Something's up with this field, skip to the next one */
 778                                         skipUntil(lexer, goal_tokens2, 2);
 779                                         continue;
 780                                 }
 781                         }
 782                         if (lexer->cur_token == TOKEN_IDENT)
 783                         {
 784                                 if (strcmp(lexer->token_str->buffer, "priv") == 0
 785                                     || strcmp(lexer->token_str->buffer, "pub") == 0)
 786                                 {
 787                                         advanceToken(lexer, TRUE);
 788                                         if (lexer->cur_token != TOKEN_IDENT)
 789                                         {
 790                                                 /* Something's up with this field, skip to the next one */
 791                                                 skipUntil(lexer, goal_tokens2, 2);
 792                                                 continue;
 793                                         }
 794                                 }
 795
 796                                 vStringClear(field_name);
 797                                 vStringCat(field_name, lexer->token_str);
 798                                 addTag(field_name, NULL, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
 799                                 skipUntil(lexer, goal_tokens2, 2);
 800                         }
 801                         if (lexer->cur_token == '}')
 802                         {
 803                                 advanceToken(lexer, TRUE);
 804                                 break;
 805                         }
 806                         advanceToken(lexer, TRUE);
 807                 }
 808                 vStringDelete(field_name);
 809         }
 810 }
 811
 812 /* Skip the body of the macro. Can't use skipUntil here as
 813  * the body of the macro may have arbitrary code which confuses it (e.g.
 814  * bitshift operators/function return arrows) */
 815 static void skipMacro (lexerState *lexer)
 816 {
 817         int level = 0;
 818         int plus_token = 0;
 819         int minus_token = 0;
 820
 821         advanceToken(lexer, TRUE);
 822         switch (lexer->cur_token)
 823         {
 824                 case '(':
 825                         plus_token = '(';
 826                         minus_token = ')';
 827                         break;
 828                 case '{':
 829                         plus_token = '{';
 830                         minus_token = '}';
 831                         break;
 832                 case '[':
 833                         plus_token = '[';
 834                         minus_token = ']';
 835                         break;
 836                 default:
 837                         return;
 838         }
 839
 840         while (lexer->cur_token != TOKEN_EOF)
 841         {
 842                 if (lexer->cur_token == plus_token)
 843                         level++;
 844                 else if (lexer->cur_token == minus_token)
 845                         level--;
 846                 if (level == 0)
 847                         break;
 848                 advanceToken(lexer, TRUE);
 849         }
 850         advanceToken(lexer, TRUE);
 851 }
 852
 853 /*
 854  * Macro rules format:
 855  * "macro_rules" "!" <ident> <macro_body>
 856  */
 857 static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
 858 {
 859         advanceToken(lexer, TRUE);
 860
 861         if (lexer->cur_token != '!')
 862                 return;
 863
 864         advanceToken(lexer, TRUE);
 865
 866         if (lexer->cur_token != TOKEN_IDENT)
 867                 return;
 868
 869         addTag(lexer->token_str, NULL, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
 870
 871         skipMacro(lexer);
 872 }
 873
 874 /*
 875  * Rust is very liberal with nesting, so this function is used pretty much for any block
 876  */
 877 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope)
 878 {
 879         int level = 1;
 880         if (delim)
 881         {
 882                 if (lexer->cur_token != '{')
 883                         return;
 884                 advanceToken(lexer, TRUE);
 885         }
 886         while (lexer->cur_token != TOKEN_EOF)
 887         {
 888                 if (lexer->cur_token == TOKEN_IDENT)
 889                 {
 890                         size_t old_scope_len = vStringLength(scope);
 891                         if (strcmp(lexer->token_str->buffer, "fn") == 0)
 892                         {
 893                                 parseFn(lexer, scope, kind);
 894                         }
 895                         else if(strcmp(lexer->token_str->buffer, "mod") == 0)
 896                         {
 897                                 parseMod(lexer, scope, kind);
 898                         }
 899                         else if(strcmp(lexer->token_str->buffer, "static") == 0)
 900                         {
 901                                 parseStatic(lexer, scope, kind);
 902                         }
 903                         else if(strcmp(lexer->token_str->buffer, "trait") == 0)
 904                         {
 905                                 parseTrait(lexer, scope, kind);
 906                         }
 907                         else if(strcmp(lexer->token_str->buffer, "type") == 0)
 908                         {
 909                                 parseType(lexer, scope, kind);
 910                         }
 911                         else if(strcmp(lexer->token_str->buffer, "impl") == 0)
 912                         {
 913                                 parseImpl(lexer, scope, kind);
 914                         }
 915                         else if(strcmp(lexer->token_str->buffer, "struct") == 0)
 916                         {
 917                                 parseStructOrEnum(lexer, scope, kind, TRUE);
 918                         }
 919                         else if(strcmp(lexer->token_str->buffer, "enum") == 0)
 920                         {
 921                                 parseStructOrEnum(lexer, scope, kind, FALSE);
 922                         }
 923                         else if(strcmp(lexer->token_str->buffer, "macro_rules") == 0)
 924                         {
 925                                 parseMacroRules(lexer, scope, kind);
 926                         }
 927                         else
 928                         {
 929                                 advanceToken(lexer, TRUE);
 930                                 if (lexer->cur_token == '!')
 931                                 {
 932                                         skipMacro(lexer);
 933                                 }
 934                         }
 935                         resetScope(scope, old_scope_len);
 936                 }
 937                 else if (lexer->cur_token == '{')
 938                 {
 939                         level++;
 940                         advanceToken(lexer, TRUE);
 941                 }
 942                 else if (lexer->cur_token == '}')
 943                 {
 944                         level--;
 945                         advanceToken(lexer, TRUE);
 946                 }
 947                 else if (lexer->cur_token == '\'')
 948                 {
 949                         /* Skip over the 'static lifetime, as it confuses the static parser above */
 950                         advanceToken(lexer, TRUE);
 951                         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "static") == 0)
 952                                 advanceToken(lexer, TRUE);
 953                 }
 954                 else
 955                 {
 956                         advanceToken(lexer, TRUE);
 957                 }
 958                 if (delim && level <= 0)
 959                         break;
 960         }
 961 }
 962
 963 static void findRustTags (void)
 964 {
 965         lexerState lexer;
 966         vString* scope = vStringNew();
 967         initLexer(&lexer);
 968
 969         parseBlock(&lexer, FALSE, K_NONE, scope);
 970         vStringDelete(scope);
 971
 972         deInitLexer(&lexer);
 973 }
 974
 975 extern parserDefinition *RustParser (void)
 976 {
 977         static const char *const extensions[] = { "rs", NULL };
 978         parserDefinition *def = parserNew ("Rust");
 979         def->kinds = rustKinds;
 980         def->kindCount = KIND_COUNT (rustKinds);
 981         def->extensions = extensions;
 982         def->parser = findRustTags;
 983
 984         return def;
 985 }