ctags/parsers/rust.c

   1 /*
   2 *
   3 *   This source code is released for free distribution under the terms of the
   4 *   GNU General Public License version 2 or (at your option) any later version.
   5 *
   6 *   This module contains functions for generating tags for Rust files.
   7 */
   8
   9 /*
  10 *   INCLUDE FILES
  11 */
  12 #include "general.h"    /* must always come first */
  13
  14 #include <string.h>
  15
  16 #include "keyword.h"
  17 #include "parse.h"
  18 #include "entry.h"
  19 #include "options.h"
  20 #include "read.h"
  21 #include "routines.h"
  22 #include "vstring.h"
  23
  24 /*
  25 *   MACROS
  26 */
  27 #define MAX_STRING_LENGTH 256
  28
  29 /*
  30 *   DATA DECLARATIONS
  31 */
  32
  33 typedef enum {
  34         K_MOD,
  35         K_STRUCT,
  36         K_TRAIT,
  37         K_IMPL,
  38         K_FN,
  39         K_ENUM,
  40         K_TYPE,
  41         K_STATIC,
  42         K_MACRO,
  43         K_FIELD,
  44         K_VARIANT,
  45         K_METHOD,
  46         K_NONE
  47 } RustKind;
  48
  49 static kindDefinition rustKinds[] = {
  50         {true, 'n', "module", "module"},
  51         {true, 's', "struct", "structural type"},
  52         {true, 'i', "interface", "trait interface"},
  53         {true, 'c', "implementation", "implementation"},
  54         {true, 'f', "function", "Function"},
  55         {true, 'g', "enum", "Enum"},
  56         {true, 't', "typedef", "Type Alias"},
  57         {true, 'v', "variable", "Global variable"},
  58         {true, 'M', "macro", "Macro Definition"},
  59         {true, 'm', "field", "A struct field"},
  60         {true, 'e', "enumerator", "An enum variant"},
  61         {true, 'P', "method", "A method"},
  62 };
  63
  64 typedef enum {
  65         TOKEN_WHITESPACE,
  66         TOKEN_STRING,
  67         TOKEN_IDENT,
  68         TOKEN_LSHIFT,
  69         TOKEN_RSHIFT,
  70         TOKEN_RARROW,
  71         TOKEN_EOF
  72 } tokenType;
  73
  74 typedef struct {
  75         /* Characters */
  76         int cur_c;
  77         int next_c;
  78
  79         /* Tokens */
  80         int cur_token;
  81         vString* token_str;
  82         unsigned long line;
  83         MIOPos pos;
  84 } lexerState;
  85
  86 /*
  87 *   FUNCTION PROTOTYPES
  88 */
  89
  90 static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope);
  91
  92 /*
  93 *   FUNCTION DEFINITIONS
  94 */
  95
  96 /* Resets the scope string to the old length */
  97 static void resetScope (vString *scope, size_t old_len)
  98 {
  99         vStringTruncate (scope, old_len);
 100 }
 101
 102 /* Adds a name to the end of the scope string */
 103 static void addToScope (vString *scope, vString *name)
 104 {
 105         if (vStringLength(scope) > 0)
 106                 vStringCatS(scope, "::");
 107         vStringCat(scope, name);
 108 }
 109
 110 /* Write the lexer's current token to string, taking care of special tokens */
 111 static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
 112 {
 113         switch (lexer->cur_token)
 114         {
 115                 case TOKEN_IDENT:
 116                         vStringCat(out_str, lexer->token_str);
 117                         break;
 118                 case TOKEN_STRING:
 119                         vStringCat(out_str, lexer->token_str);
 120                         break;
 121                 case TOKEN_WHITESPACE:
 122                         vStringPut(out_str, ' ');
 123                         break;
 124                 case TOKEN_LSHIFT:
 125                         vStringCatS(out_str, "<<");
 126                         break;
 127                 case TOKEN_RSHIFT:
 128                         vStringCatS(out_str, ">>");
 129                         break;
 130                 case TOKEN_RARROW:
 131                         vStringCatS(out_str, "->");
 132                         break;
 133                 default:
 134                         vStringPut(out_str, (char) lexer->cur_token);
 135         }
 136 }
 137
 138 /* Reads a character from the file */
 139 static void advanceChar (lexerState *lexer)
 140 {
 141         lexer->cur_c = lexer->next_c;
 142         lexer->next_c = getcFromInputFile();
 143 }
 144
 145 /* Reads N characters from the file */
 146 static void advanceNChar (lexerState *lexer, int n)
 147 {
 148         while (n--)
 149                 advanceChar(lexer);
 150 }
 151
 152 /* Store the current character in lexerState::token_str if there is space
 153  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
 154 static void advanceAndStoreChar (lexerState *lexer)
 155 {
 156         if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
 157                 vStringPut(lexer->token_str, (char) lexer->cur_c);
 158         advanceChar(lexer);
 159 }
 160
 161 static bool isWhitespace (int c)
 162 {
 163         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 164 }
 165
 166 static bool isAscii (int c)
 167 {
 168         return (c >= 0) && (c < 0x80);
 169 }
 170
 171 /* This isn't quite right for Unicode identifiers */
 172 static bool isIdentifierStart (int c)
 173 {
 174         return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
 175 }
 176
 177 /* This isn't quite right for Unicode identifiers */
 178 static bool isIdentifierContinue (int c)
 179 {
 180         return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
 181 }
 182
 183 static void scanWhitespace (lexerState *lexer)
 184 {
 185         while (isWhitespace(lexer->cur_c))
 186                 advanceChar(lexer);
 187 }
 188
 189 /* Normal line comments start with two /'s and continue until the next \n
 190  * (potentially after a \r). Additionally, a shebang in the beginning of the
 191  * file also counts as a line comment as long as it is not this sequence: #![ .
 192  * Block comments start with / followed by a * and end with a * followed by a /.
 193  * Unlike in C/C++ they nest. */
 194 static void scanComments (lexerState *lexer)
 195 {
 196         /* // */
 197         if (lexer->next_c == '/')
 198         {
 199                 advanceNChar(lexer, 2);
 200                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 201                         advanceChar(lexer);
 202         }
 203         /* #! */
 204         else if (lexer->next_c == '!')
 205         {
 206                 advanceNChar(lexer, 2);
 207                 /* If it is exactly #![ then it is not a comment, but an attribute */
 208                 if (lexer->cur_c == '[')
 209                         return;
 210                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 211                         advanceChar(lexer);
 212         }
 213         /* block comment */
 214         else if (lexer->next_c == '*')
 215         {
 216                 int level = 1;
 217                 advanceNChar(lexer, 2);
 218                 while (lexer->cur_c != EOF && level > 0)
 219                 {
 220                         if (lexer->cur_c == '*' && lexer->next_c == '/')
 221                         {
 222                                 level--;
 223                                 advanceNChar(lexer, 2);
 224                         }
 225                         else if (lexer->cur_c == '/' && lexer->next_c == '*')
 226                         {
 227                                 level++;
 228                                 advanceNChar(lexer, 2);
 229                         }
 230                         else
 231                         {
 232                                 advanceChar(lexer);
 233                         }
 234                 }
 235         }
 236 }
 237
 238 static void scanIdentifier (lexerState *lexer)
 239 {
 240         vStringClear(lexer->token_str);
 241         do
 242         {
 243                 advanceAndStoreChar(lexer);
 244         } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
 245 }
 246
 247 /* Double-quoted strings, we only care about the \" escape. These
 248  * last past the end of the line, so be careful not too store too much
 249  * of them (see MAX_STRING_LENGTH). The only place we look at their
 250  * contents is in the function definitions, and there the valid strings are
 251  * things like "C" and "Rust" */
 252 static void scanString (lexerState *lexer)
 253 {
 254         vStringClear(lexer->token_str);
 255         advanceAndStoreChar(lexer);
 256         while (lexer->cur_c != EOF && lexer->cur_c != '"')
 257         {
 258                 if (lexer->cur_c == '\\' && lexer->next_c == '"')
 259                         advanceAndStoreChar(lexer);
 260                 advanceAndStoreChar(lexer);
 261         }
 262         advanceAndStoreChar(lexer);
 263 }
 264
 265 /* Raw strings look like this: r"" or r##""## where the number of
 266  * hashes must match */
 267 static void scanRawString (lexerState *lexer)
 268 {
 269         size_t num_initial_hashes = 0;
 270         vStringClear(lexer->token_str);
 271         advanceAndStoreChar(lexer);
 272         /* Count how many leading hashes there are */
 273         while (lexer->cur_c == '#')
 274         {
 275                 num_initial_hashes++;
 276                 advanceAndStoreChar(lexer);
 277         }
 278         if (lexer->cur_c != '"')
 279                 return;
 280         advanceAndStoreChar(lexer);
 281         while (lexer->cur_c != EOF)
 282         {
 283                 /* Count how many trailing hashes there are. If the number is equal or more
 284                  * than the number of leading hashes, break. */
 285                 if (lexer->cur_c == '"')
 286                 {
 287                         size_t num_trailing_hashes = 0;
 288                         advanceAndStoreChar(lexer);
 289                         while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
 290                         {
 291                                 num_trailing_hashes++;
 292
 293                                 advanceAndStoreChar(lexer);
 294                         }
 295                         if (num_trailing_hashes == num_initial_hashes)
 296                                 break;
 297                 }
 298                 else
 299                 {
 300                         advanceAndStoreChar(lexer);
 301                 }
 302         }
 303 }
 304
 305 /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
 306  * 'lifetime. We'll use this approximate regexp for the literals:
 307  * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
 308  * token as a string, so it gets preserved as is for function signatures with
 309  * lifetimes. */
 310 static void scanCharacterOrLifetime (lexerState *lexer)
 311 {
 312         vStringClear(lexer->token_str);
 313         advanceAndStoreChar(lexer);
 314
 315         if (lexer->cur_c == '\\')
 316         {
 317                 advanceAndStoreChar(lexer);
 318                 /* The \' \\ \' \' (literally '\'') case */
 319                 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
 320                 {
 321                         advanceAndStoreChar(lexer);
 322                         advanceAndStoreChar(lexer);
 323                 }
 324                 /* The \' \\ [^']+ \' case */
 325                 else
 326                 {
 327                         while (lexer->cur_c != EOF && lexer->cur_c != '\'')
 328                                 advanceAndStoreChar(lexer);
 329                 }
 330         }
 331         /* The \' [^'] \' case */
 332         else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
 333         {
 334                 advanceAndStoreChar(lexer);
 335                 advanceAndStoreChar(lexer);
 336         }
 337         /* Otherwise it is malformed, or a lifetime */
 338 }
 339
 340 /* Advances the parser one token, optionally skipping whitespace
 341  * (otherwise it is concatenated and returned as a single whitespace token).
 342  * Whitespace is needed to properly render function signatures. Unrecognized
 343  * token starts are stored literally, e.g. token may equal to a character '#'. */
 344 static int advanceToken (lexerState *lexer, bool skip_whitspace)
 345 {
 346         bool have_whitespace = false;
 347         lexer->line = getInputLineNumber();
 348         lexer->pos = getInputFilePosition();
 349         while (lexer->cur_c != EOF)
 350         {
 351                 if (isWhitespace(lexer->cur_c))
 352                 {
 353                         scanWhitespace(lexer);
 354                         have_whitespace = true;
 355                 }
 356                 else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
 357                 {
 358                         scanComments(lexer);
 359                         have_whitespace = true;
 360                 }
 361                 else
 362                 {
 363                         if (have_whitespace && !skip_whitspace)
 364                                 return lexer->cur_token = TOKEN_WHITESPACE;
 365                         break;
 366                 }
 367         }
 368         lexer->line = getInputLineNumber();
 369         lexer->pos = getInputFilePosition();
 370         while (lexer->cur_c != EOF)
 371         {
 372                 if (lexer->cur_c == '"')
 373                 {
 374                         scanString(lexer);
 375                         return lexer->cur_token = TOKEN_STRING;
 376                 }
 377                 else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
 378                 {
 379                         scanRawString(lexer);
 380                         return lexer->cur_token = TOKEN_STRING;
 381                 }
 382                 else if (lexer->cur_c == '\'')
 383                 {
 384                         scanCharacterOrLifetime(lexer);
 385                         return lexer->cur_token = TOKEN_STRING;
 386                 }
 387                 else if (isIdentifierStart(lexer->cur_c))
 388                 {
 389                         scanIdentifier(lexer);
 390                         return lexer->cur_token = TOKEN_IDENT;
 391                 }
 392                 /* These shift tokens aren't too important for tag-generation per se,
 393                  * but they confuse the skipUntil code which tracks the <> pairs. */
 394                 else if (lexer->cur_c == '>' && lexer->next_c == '>')
 395                 {
 396                         advanceNChar(lexer, 2);
 397                         return lexer->cur_token = TOKEN_RSHIFT;
 398                 }
 399                 else if (lexer->cur_c == '<' && lexer->next_c == '<')
 400                 {
 401                         advanceNChar(lexer, 2);
 402                         return lexer->cur_token = TOKEN_LSHIFT;
 403                 }
 404                 else if (lexer->cur_c == '-' && lexer->next_c == '>')
 405                 {
 406                         advanceNChar(lexer, 2);
 407                         return lexer->cur_token = TOKEN_RARROW;
 408                 }
 409                 else
 410                 {
 411                         int c = lexer->cur_c;
 412                         advanceChar(lexer);
 413                         return lexer->cur_token = c;
 414                 }
 415         }
 416         return lexer->cur_token = TOKEN_EOF;
 417 }
 418
 419 static void initLexer (lexerState *lexer)
 420 {
 421         advanceNChar(lexer, 2);
 422         lexer->token_str = vStringNew();
 423
 424         if (lexer->cur_c == '#' && lexer->next_c == '!')
 425                 scanComments(lexer);
 426         advanceToken(lexer, true);
 427 }
 428
 429 static void deInitLexer (lexerState *lexer)
 430 {
 431         vStringDelete(lexer->token_str);
 432         lexer->token_str = NULL;
 433 }
 434
 435 static void addTag (vString* ident, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
 436 {
 437         if (kind == K_NONE || ! rustKinds[kind].enabled)
 438                 return;
 439         tagEntryInfo tag;
 440         initTagEntry(&tag, vStringValue(ident), kind);
 441
 442         tag.lineNumber = line;
 443         tag.filePosition = pos;
 444
 445         tag.extensionFields.signature = arg_list;
 446         /*tag.extensionFields.varType = type;*/ /* FIXME: map to typeRef[1]? */
 447         if (parent_kind != K_NONE)
 448         {
 449                 tag.extensionFields.scopeKindIndex = parent_kind;
 450                 tag.extensionFields.scopeName = vStringValue(scope);
 451         }
 452         makeTagEntry(&tag);
 453 }
 454
 455 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
 456  * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
 457 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
 458 {
 459         int angle_level = 0;
 460         int paren_level = 0;
 461         int brace_level = 0;
 462         int bracket_level = 0;
 463         while (lexer->cur_token != TOKEN_EOF)
 464         {
 465                 if (angle_level == 0 && paren_level == 0 && brace_level == 0
 466                     && bracket_level == 0)
 467                 {
 468                         int ii = 0;
 469                         for(ii = 0; ii < num_goal_tokens; ii++)
 470                         {
 471                                 if (lexer->cur_token == goal_tokens[ii])
 472                                 {
 473                                         break;
 474                                 }
 475                         }
 476                         if (ii < num_goal_tokens)
 477                                 break;
 478                 }
 479                 switch (lexer->cur_token)
 480                 {
 481                         case '<':
 482                                 angle_level++;
 483                                 break;
 484                         case '(':
 485                                 paren_level++;
 486                                 break;
 487                         case '{':
 488                                 brace_level++;
 489                                 break;
 490                         case '[':
 491                                 bracket_level++;
 492                                 break;
 493                         case '>':
 494                                 angle_level--;
 495                                 break;
 496                         case ')':
 497                                 paren_level--;
 498                                 break;
 499                         case '}':
 500                                 brace_level--;
 501                                 break;
 502                         case ']':
 503                                 bracket_level--;
 504                                 break;
 505                         case TOKEN_RSHIFT:
 506                                 if (angle_level >= 2)
 507                                         angle_level -= 2;
 508                                 break;
 509                         /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
 510                         default:
 511                                 break;
 512                 }
 513                 /* Has to be after the token switch to catch the case when we start with the initial level token */
 514                 if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
 515                     && bracket_level == 0)
 516                         break;
 517                 advanceToken(lexer, true);
 518         }
 519 }
 520
 521 /* Function format:
 522  * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
 523 static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
 524 {
 525         int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
 526         vString *name;
 527         vString *arg_list;
 528         unsigned long line;
 529         MIOPos pos;
 530         int paren_level = 0;
 531         int bracket_level = 0;
 532         bool found_paren = false;
 533         bool valid_signature = true;
 534
 535         advanceToken(lexer, true);
 536         if (lexer->cur_token != TOKEN_IDENT)
 537                 return;
 538
 539         name = vStringNewCopy(lexer->token_str);
 540         arg_list = vStringNew();
 541
 542         line = lexer->line;
 543         pos = lexer->pos;
 544
 545         advanceToken(lexer, true);
 546
 547         /* HACK: This is a bit coarse as far as what tag entry means by
 548          * 'arglist'... */
 549         while (lexer->cur_token != '{')
 550         {
 551                 if (lexer->cur_token == ';' && bracket_level == 0)
 552                 {
 553                         break;
 554                 }
 555                 else if (lexer->cur_token == '}')
 556                 {
 557                         valid_signature = false;
 558                         break;
 559                 }
 560                 else if (lexer->cur_token == '(')
 561                 {
 562                         found_paren = true;
 563                         paren_level++;
 564                 }
 565                 else if (lexer->cur_token == ')')
 566                 {
 567                         paren_level--;
 568                         if (paren_level < 0)
 569                         {
 570                                 valid_signature = false;
 571                                 break;
 572                         }
 573                 }
 574                 else if (lexer->cur_token == '[')
 575                 {
 576                         bracket_level++;
 577                 }
 578                 else if (lexer->cur_token == ']')
 579                 {
 580                         bracket_level--;
 581                 }
 582                 else if (lexer->cur_token == TOKEN_EOF)
 583                 {
 584                         valid_signature = false;
 585                         break;
 586                 }
 587                 writeCurTokenToStr(lexer, arg_list);
 588                 advanceToken(lexer, false);
 589         }
 590         if (!found_paren || paren_level != 0 || bracket_level != 0)
 591                 valid_signature = false;
 592
 593         if (valid_signature)
 594         {
 595                 vStringStripTrailing(arg_list);
 596                 addTag(name, vStringValue(arg_list), kind, line, pos, scope, parent_kind);
 597                 addToScope(scope, name);
 598                 parseBlock(lexer, true, kind, scope);
 599         }
 600
 601         vStringDelete(name);
 602         vStringDelete(arg_list);
 603 }
 604
 605 /* Mod format:
 606  * "mod" <ident> "{" [<body>] "}"
 607  * "mod" <ident> ";"*/
 608 static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
 609 {
 610         advanceToken(lexer, true);
 611         if (lexer->cur_token != TOKEN_IDENT)
 612                 return;
 613
 614         addTag(lexer->token_str, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
 615         addToScope(scope, lexer->token_str);
 616
 617         advanceToken(lexer, true);
 618
 619         parseBlock(lexer, true, K_MOD, scope);
 620 }
 621
 622 /* Trait format:
 623  * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
 624  */
 625 static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
 626 {
 627         int goal_tokens[] = {'{'};
 628
 629         advanceToken(lexer, true);
 630         if (lexer->cur_token != TOKEN_IDENT)
 631                 return;
 632
 633         addTag(lexer->token_str, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
 634         addToScope(scope, lexer->token_str);
 635
 636         advanceToken(lexer, true);
 637
 638         skipUntil(lexer, goal_tokens, 1);
 639
 640         parseBlock(lexer, true, K_TRAIT, scope);
 641 }
 642
 643 /* Skips type blocks of the form <T:T<T>, ...> */
 644 static void skipTypeBlock (lexerState *lexer)
 645 {
 646         if (lexer->cur_token == '<')
 647         {
 648                 skipUntil(lexer, NULL, 0);
 649                 advanceToken(lexer, true);
 650         }
 651 }
 652
 653 /* Essentially grabs the last ident before 'for', '<' and '{', which
 654  * tends to correspond to what we want as the impl tag entry name */
 655 static void parseQualifiedType (lexerState *lexer, vString* name)
 656 {
 657         while (lexer->cur_token != TOKEN_EOF)
 658         {
 659                 if (lexer->cur_token == TOKEN_IDENT)
 660                 {
 661                         if (strcmp(vStringValue(lexer->token_str), "for") == 0
 662                                 || strcmp(vStringValue(lexer->token_str), "where") == 0)
 663                                 break;
 664                         vStringClear(name);
 665                         vStringCat(name, lexer->token_str);
 666                 }
 667                 else if (lexer->cur_token == '<' || lexer->cur_token == '{')
 668                 {
 669                         break;
 670                 }
 671                 advanceToken(lexer, true);
 672         }
 673         skipTypeBlock(lexer);
 674 }
 675
 676 /* Impl format:
 677  * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
 678  */
 679 static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
 680 {
 681         unsigned long line;
 682         MIOPos pos;
 683         vString *name;
 684
 685         advanceToken(lexer, true);
 686
 687         line = lexer->line;
 688         pos = lexer->pos;
 689
 690         skipTypeBlock(lexer);
 691
 692         name = vStringNew();
 693
 694         parseQualifiedType(lexer, name);
 695
 696         if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "for") == 0)
 697         {
 698                 advanceToken(lexer, true);
 699                 parseQualifiedType(lexer, name);
 700         }
 701
 702         addTag(name, NULL, K_IMPL, line, pos, scope, parent_kind);
 703         addToScope(scope, name);
 704
 705         parseBlock(lexer, true, K_IMPL, scope);
 706
 707         vStringDelete(name);
 708 }
 709
 710 /* Static format:
 711  * "static" ["mut"] <ident>
 712  */
 713 static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
 714 {
 715         advanceToken(lexer, true);
 716         if (lexer->cur_token != TOKEN_IDENT)
 717                 return;
 718         if (strcmp(vStringValue(lexer->token_str), "mut") == 0)
 719         {
 720                 advanceToken(lexer, true);
 721         }
 722         if (lexer->cur_token != TOKEN_IDENT)
 723                 return;
 724
 725         addTag(lexer->token_str, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
 726 }
 727
 728 /* Type format:
 729  * "type" <ident>
 730  */
 731 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
 732 {
 733         advanceToken(lexer, true);
 734         if (lexer->cur_token != TOKEN_IDENT)
 735                 return;
 736
 737         addTag(lexer->token_str, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
 738 }
 739
 740 /* Structs and enums are very similar syntax-wise.
 741  * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
 742  * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
 743  * the enum's definition (e.g. for the type bounds)
 744  *
 745  * Struct/Enum format:
 746  * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
 747  * "struct/enum" <ident>[<type_bounds>] ";"
 748  * */
 749 static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, bool is_struct)
 750 {
 751         int kind = is_struct ? K_STRUCT : K_ENUM;
 752         int field_kind = is_struct ? K_FIELD : K_VARIANT;
 753         int goal_tokens1[] = {';', '{'};
 754
 755         advanceToken(lexer, true);
 756         if (lexer->cur_token != TOKEN_IDENT)
 757                 return;
 758
 759         addTag(lexer->token_str, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
 760         addToScope(scope, lexer->token_str);
 761
 762         skipUntil(lexer, goal_tokens1, 2);
 763
 764         if (lexer->cur_token == '{')
 765         {
 766                 vString *field_name = vStringNew();
 767                 while (lexer->cur_token != TOKEN_EOF)
 768                 {
 769                         int goal_tokens2[] = {'}', ','};
 770                         /* Skip attributes. Format:
 771                          * #[..] or #![..]
 772                          * */
 773                         if (lexer->cur_token == '#')
 774                         {
 775                                 advanceToken(lexer, true);
 776                                 if (lexer->cur_token == '!')
 777                                         advanceToken(lexer, true);
 778                                 if (lexer->cur_token == '[')
 779                                 {
 780                                         /* It's an attribute, skip it. */
 781                                         skipUntil(lexer, NULL, 0);
 782                                 }
 783                                 else
 784                                 {
 785                                         /* Something's up with this field, skip to the next one */
 786                                         skipUntil(lexer, goal_tokens2, 2);
 787                                         continue;
 788                                 }
 789                         }
 790                         if (lexer->cur_token == TOKEN_IDENT)
 791                         {
 792                                 if (strcmp(vStringValue(lexer->token_str), "priv") == 0
 793                                     || strcmp(vStringValue(lexer->token_str), "pub") == 0)
 794                                 {
 795                                         advanceToken(lexer, true);
 796
 797                                         /* Skip thevisibility specificaions.
 798                                          * https://doc.rust-lang.org/reference/visibility-and-privacy.html */
 799                                         if (lexer->cur_token == '(')
 800                                         {
 801                                                 advanceToken(lexer, true);
 802                                                 skipUntil (lexer, (int []){')'}, 1);
 803                                                 advanceToken(lexer, true);
 804                                         }
 805
 806                                         if (lexer->cur_token != TOKEN_IDENT)
 807                                         {
 808                                                 /* Something's up with this field, skip to the next one */
 809                                                 skipUntil(lexer, goal_tokens2, 2);
 810                                                 continue;
 811                                         }
 812                                 }
 813
 814                                 vStringClear(field_name);
 815                                 vStringCat(field_name, lexer->token_str);
 816                                 addTag(field_name, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
 817                                 skipUntil(lexer, goal_tokens2, 2);
 818                         }
 819                         if (lexer->cur_token == '}')
 820                         {
 821                                 advanceToken(lexer, true);
 822                                 break;
 823                         }
 824                         advanceToken(lexer, true);
 825                 }
 826                 vStringDelete(field_name);
 827         }
 828 }
 829
 830 /* Skip the body of the macro. Can't use skipUntil here as
 831  * the body of the macro may have arbitrary code which confuses it (e.g.
 832  * bitshift operators/function return arrows) */
 833 static void skipMacro (lexerState *lexer)
 834 {
 835         int level = 0;
 836         int plus_token = 0;
 837         int minus_token = 0;
 838
 839         advanceToken(lexer, true);
 840         switch (lexer->cur_token)
 841         {
 842                 case '(':
 843                         plus_token = '(';
 844                         minus_token = ')';
 845                         break;
 846                 case '{':
 847                         plus_token = '{';
 848                         minus_token = '}';
 849                         break;
 850                 case '[':
 851                         plus_token = '[';
 852                         minus_token = ']';
 853                         break;
 854                 default:
 855                         return;
 856         }
 857
 858         while (lexer->cur_token != TOKEN_EOF)
 859         {
 860                 if (lexer->cur_token == plus_token)
 861                         level++;
 862                 else if (lexer->cur_token == minus_token)
 863                         level--;
 864                 if (level == 0)
 865                         break;
 866                 advanceToken(lexer, true);
 867         }
 868         advanceToken(lexer, true);
 869 }
 870
 871 /*
 872  * Macro rules format:
 873  * "macro_rules" "!" <ident> <macro_body>
 874  */
 875 static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
 876 {
 877         advanceToken(lexer, true);
 878
 879         if (lexer->cur_token != '!')
 880                 return;
 881
 882         advanceToken(lexer, true);
 883
 884         if (lexer->cur_token != TOKEN_IDENT)
 885                 return;
 886
 887         addTag(lexer->token_str, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
 888
 889         skipMacro(lexer);
 890 }
 891
 892 /*
 893  * Rust is very liberal with nesting, so this function is used pretty much for any block
 894  */
 895 static void parseBlock (lexerState *lexer, bool delim, int kind, vString *scope)
 896 {
 897         int level = 1;
 898         if (delim)
 899         {
 900                 if (lexer->cur_token != '{')
 901                         return;
 902                 advanceToken(lexer, true);
 903         }
 904         while (lexer->cur_token != TOKEN_EOF)
 905         {
 906                 if (lexer->cur_token == TOKEN_IDENT)
 907                 {
 908                         size_t old_scope_len = vStringLength(scope);
 909                         if (strcmp(vStringValue(lexer->token_str), "fn") == 0)
 910                         {
 911                                 parseFn(lexer, scope, kind);
 912                         }
 913                         else if(strcmp(vStringValue(lexer->token_str), "mod") == 0)
 914                         {
 915                                 parseMod(lexer, scope, kind);
 916                         }
 917                         else if(strcmp(vStringValue(lexer->token_str), "static") == 0)
 918                         {
 919                                 parseStatic(lexer, scope, kind);
 920                         }
 921                         else if(strcmp(vStringValue(lexer->token_str), "trait") == 0)
 922                         {
 923                                 parseTrait(lexer, scope, kind);
 924                         }
 925                         else if(strcmp(vStringValue(lexer->token_str), "type") == 0)
 926                         {
 927                                 parseType(lexer, scope, kind);
 928                         }
 929                         else if(strcmp(vStringValue(lexer->token_str), "impl") == 0)
 930                         {
 931                                 parseImpl(lexer, scope, kind);
 932                         }
 933                         else if(strcmp(vStringValue(lexer->token_str), "struct") == 0)
 934                         {
 935                                 parseStructOrEnum(lexer, scope, kind, true);
 936                         }
 937                         else if(strcmp(vStringValue(lexer->token_str), "enum") == 0)
 938                         {
 939                                 parseStructOrEnum(lexer, scope, kind, false);
 940                         }
 941                         else if(strcmp(vStringValue(lexer->token_str), "macro_rules") == 0)
 942                         {
 943                                 parseMacroRules(lexer, scope, kind);
 944                         }
 945                         else
 946                         {
 947                                 advanceToken(lexer, true);
 948                                 if (lexer->cur_token == '!')
 949                                 {
 950                                         skipMacro(lexer);
 951                                 }
 952                         }
 953                         resetScope(scope, old_scope_len);
 954                 }
 955                 else if (lexer->cur_token == '{')
 956                 {
 957                         level++;
 958                         advanceToken(lexer, true);
 959                 }
 960                 else if (lexer->cur_token == '}')
 961                 {
 962                         level--;
 963                         advanceToken(lexer, true);
 964                 }
 965                 else if (lexer->cur_token == '\'')
 966                 {
 967                         /* Skip over the 'static lifetime, as it confuses the static parser above */
 968                         advanceToken(lexer, true);
 969                         if (lexer->cur_token == TOKEN_IDENT && strcmp(vStringValue(lexer->token_str), "static") == 0)
 970                                 advanceToken(lexer, true);
 971                 }
 972                 else
 973                 {
 974                         advanceToken(lexer, true);
 975                 }
 976                 if (delim && level <= 0)
 977                         break;
 978         }
 979 }
 980
 981 static void findRustTags (void)
 982 {
 983         lexerState lexer = {0};
 984         vString* scope = vStringNew();
 985         initLexer(&lexer);
 986
 987         parseBlock(&lexer, false, K_NONE, scope);
 988         vStringDelete(scope);
 989
 990         deInitLexer(&lexer);
 991 }
 992
 993 extern parserDefinition *RustParser (void)
 994 {
 995         static const char *const extensions[] = { "rs", NULL };
 996         parserDefinition *def = parserNew ("Rust");
 997         def->kindTable = rustKinds;
 998         def->kindCount = ARRAY_SIZE (rustKinds);
 999         def->extensions = extensions;
1000         def->parser = findRustTags;
1001
1002         return def;
1003 }