ctags/parsers/rust.c

   1 /*
   2 *
   3 *   This source code is released for free distribution under the terms of the
   4 *   GNU General Public License version 2 or (at your option) any later version.
   5 *
   6 *   This module contains functions for generating tags for Rust files.
   7 */
   8
   9 /*
  10 *   INCLUDE FILES
  11 */
  12 #include "general.h"    /* must always come first */
  13 #include "main.h"
  14
  15 #include <string.h>
  16
  17 #include "keyword.h"
  18 #include "parse.h"
  19 #include "entry.h"
  20 #include "options.h"
  21 #include "read.h"
  22 #include "routines.h"
  23 #include "vstring.h"
  24
  25 /*
  26 *   MACROS
  27 */
  28 #define MAX_STRING_LENGTH 256
  29
  30 /*
  31 *   DATA DECLARATIONS
  32 */
  33
  34 typedef enum {
  35         K_MOD,
  36         K_STRUCT,
  37         K_TRAIT,
  38         K_IMPL,
  39         K_FN,
  40         K_ENUM,
  41         K_TYPE,
  42         K_STATIC,
  43         K_MACRO,
  44         K_FIELD,
  45         K_VARIANT,
  46         K_METHOD,
  47         K_NONE
  48 } RustKind;
  49
  50 static kindOption rustKinds[] = {
  51         {TRUE, 'n', "module", "module"},
  52         {TRUE, 's', "struct", "structural type"},
  53         {TRUE, 'i', "interface", "trait interface"},
  54         {TRUE, 'c', "implementation", "implementation"},
  55         {TRUE, 'f', "function", "Function"},
  56         {TRUE, 'g', "enum", "Enum"},
  57         {TRUE, 't', "typedef", "Type Alias"},
  58         {TRUE, 'v', "variable", "Global variable"},
  59         {TRUE, 'M', "macro", "Macro Definition"},
  60         {TRUE, 'm', "field", "A struct field"},
  61         {TRUE, 'e', "enumerator", "An enum variant"},
  62         {TRUE, 'F', "method", "A method"},
  63 };
  64
  65 typedef enum {
  66         TOKEN_WHITESPACE,
  67         TOKEN_STRING,
  68         TOKEN_IDENT,
  69         TOKEN_LSHIFT,
  70         TOKEN_RSHIFT,
  71         TOKEN_RARROW,
  72         TOKEN_EOF
  73 } tokenType;
  74
  75 typedef struct {
  76         /* Characters */
  77         int cur_c;
  78         int next_c;
  79
  80         /* Tokens */
  81         int cur_token;
  82         vString* token_str;
  83         unsigned long line;
  84         MIOPos pos;
  85 } lexerState;
  86
  87 /*
  88 *   FUNCTION PROTOTYPES
  89 */
  90
  91 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope);
  92
  93 /*
  94 *   FUNCTION DEFINITIONS
  95 */
  96
  97 /* Resets the scope string to the old length */
  98 static void resetScope (vString *scope, size_t old_len)
  99 {
 100         scope->length = old_len;
 101         scope->buffer[old_len] = '\0';
 102 }
 103
 104 /* Adds a name to the end of the scope string */
 105 static void addToScope (vString *scope, vString *name)
 106 {
 107         if (vStringLength(scope) > 0)
 108                 vStringCatS(scope, "::");
 109         vStringCat(scope, name);
 110 }
 111
 112 /* Write the lexer's current token to string, taking care of special tokens */
 113 static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
 114 {
 115         switch (lexer->cur_token)
 116         {
 117                 case TOKEN_IDENT:
 118                         vStringCat(out_str, lexer->token_str);
 119                         break;
 120                 case TOKEN_STRING:
 121                         vStringCat(out_str, lexer->token_str);
 122                         break;
 123                 case TOKEN_WHITESPACE:
 124                         vStringPut(out_str, ' ');
 125                         break;
 126                 case TOKEN_LSHIFT:
 127                         vStringCatS(out_str, "<<");
 128                         break;
 129                 case TOKEN_RSHIFT:
 130                         vStringCatS(out_str, ">>");
 131                         break;
 132                 case TOKEN_RARROW:
 133                         vStringCatS(out_str, "->");
 134                         break;
 135                 default:
 136                         vStringPut(out_str, (char) lexer->cur_token);
 137         }
 138 }
 139
 140 /* Reads a character from the file */
 141 static void advanceChar (lexerState *lexer)
 142 {
 143         lexer->cur_c = lexer->next_c;
 144         lexer->next_c = fileGetc();
 145 }
 146
 147 /* Reads N characters from the file */
 148 static void advanceNChar (lexerState *lexer, int n)
 149 {
 150         while (n--)
 151                 advanceChar(lexer);
 152 }
 153
 154 /* Store the current character in lexerState::token_str if there is space
 155  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
 156 static void advanceAndStoreChar (lexerState *lexer)
 157 {
 158         if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
 159                 vStringPut(lexer->token_str, (char) lexer->cur_c);
 160         advanceChar(lexer);
 161 }
 162
 163 static boolean isWhitespace (int c)
 164 {
 165         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 166 }
 167
 168 static boolean isAscii (int c)
 169 {
 170         return (c >= 0) && (c < 0x80);
 171 }
 172
 173 /* This isn't quite right for Unicode identifiers */
 174 static boolean isIdentifierStart (int c)
 175 {
 176         return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
 177 }
 178
 179 /* This isn't quite right for Unicode identifiers */
 180 static boolean isIdentifierContinue (int c)
 181 {
 182         return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
 183 }
 184
 185 static void scanWhitespace (lexerState *lexer)
 186 {
 187         while (isWhitespace(lexer->cur_c))
 188                 advanceChar(lexer);
 189 }
 190
 191 /* Normal line comments start with two /'s and continue until the next \n
 192  * (potentially after a \r). Additionally, a shebang in the beginning of the
 193  * file also counts as a line comment as long as it is not this sequence: #![ .
 194  * Block comments start with / followed by a * and end with a * followed by a /.
 195  * Unlike in C/C++ they nest. */
 196 static void scanComments (lexerState *lexer)
 197 {
 198         /* // */
 199         if (lexer->next_c == '/')
 200         {
 201                 advanceNChar(lexer, 2);
 202                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 203                         advanceChar(lexer);
 204         }
 205         /* #! */
 206         else if (lexer->next_c == '!')
 207         {
 208                 advanceNChar(lexer, 2);
 209                 /* If it is exactly #![ then it is not a comment, but an attribute */
 210                 if (lexer->cur_c == '[')
 211                         return;
 212                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 213                         advanceChar(lexer);
 214         }
 215         /* block comment */
 216         else if (lexer->next_c == '*')
 217         {
 218                 int level = 1;
 219                 advanceNChar(lexer, 2);
 220                 while (lexer->cur_c != EOF && level > 0)
 221                 {
 222                         if (lexer->cur_c == '*' && lexer->next_c == '/')
 223                         {
 224                                 level--;
 225                                 advanceNChar(lexer, 2);
 226                         }
 227                         else if (lexer->cur_c == '/' && lexer->next_c == '*')
 228                         {
 229                                 level++;
 230                                 advanceNChar(lexer, 2);
 231                         }
 232                         else
 233                         {
 234                                 advanceChar(lexer);
 235                         }
 236                 }
 237         }
 238 }
 239
 240 static void scanIdentifier (lexerState *lexer)
 241 {
 242         vStringClear(lexer->token_str);
 243         do
 244         {
 245                 advanceAndStoreChar(lexer);
 246         } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
 247 }
 248
 249 /* Double-quoted strings, we only care about the \" escape. These
 250  * last past the end of the line, so be careful not too store too much
 251  * of them (see MAX_STRING_LENGTH). The only place we look at their
 252  * contents is in the function definitions, and there the valid strings are
 253  * things like "C" and "Rust" */
 254 static void scanString (lexerState *lexer)
 255 {
 256         vStringClear(lexer->token_str);
 257         advanceAndStoreChar(lexer);
 258         while (lexer->cur_c != EOF && lexer->cur_c != '"')
 259         {
 260                 if (lexer->cur_c == '\\' && lexer->next_c == '"')
 261                         advanceAndStoreChar(lexer);
 262                 advanceAndStoreChar(lexer);
 263         }
 264         advanceAndStoreChar(lexer);
 265 }
 266
 267 /* Raw strings look like this: r"" or r##""## where the number of
 268  * hashes must match */
 269 static void scanRawString (lexerState *lexer)
 270 {
 271         size_t num_initial_hashes = 0;
 272         vStringClear(lexer->token_str);
 273         advanceAndStoreChar(lexer);
 274         /* Count how many leading hashes there are */
 275         while (lexer->cur_c == '#')
 276         {
 277                 num_initial_hashes++;
 278                 advanceAndStoreChar(lexer);
 279         }
 280         if (lexer->cur_c != '"')
 281                 return;
 282         advanceAndStoreChar(lexer);
 283         while (lexer->cur_c != EOF)
 284         {
 285                 /* Count how many trailing hashes there are. If the number is equal or more
 286                  * than the number of leading hashes, break. */
 287                 if (lexer->cur_c == '"')
 288                 {
 289                         size_t num_trailing_hashes = 0;
 290                         advanceAndStoreChar(lexer);
 291                         while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
 292                         {
 293                                 num_trailing_hashes++;
 294
 295                                 advanceAndStoreChar(lexer);
 296                         }
 297                         if (num_trailing_hashes == num_initial_hashes)
 298                                 break;
 299                 }
 300                 else
 301                 {
 302                         advanceAndStoreChar(lexer);
 303                 }
 304         }
 305 }
 306
 307 /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
 308  * 'lifetime. We'll use this approximate regexp for the literals:
 309  * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
 310  * token as a string, so it gets preserved as is for function signatures with
 311  * lifetimes. */
 312 static void scanCharacterOrLifetime (lexerState *lexer)
 313 {
 314         vStringClear(lexer->token_str);
 315         advanceAndStoreChar(lexer);
 316
 317         if (lexer->cur_c == '\\')
 318         {
 319                 advanceAndStoreChar(lexer);
 320                 /* The \' \\ \' \' (literally '\'') case */
 321                 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
 322                 {
 323                         advanceAndStoreChar(lexer);
 324                         advanceAndStoreChar(lexer);
 325                 }
 326                 /* The \' \\ [^']+ \' case */
 327                 else
 328                 {
 329                         while (lexer->cur_c != EOF && lexer->cur_c != '\'')
 330                                 advanceAndStoreChar(lexer);
 331                 }
 332         }
 333         /* The \' [^'] \' case */
 334         else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
 335         {
 336                 advanceAndStoreChar(lexer);
 337                 advanceAndStoreChar(lexer);
 338         }
 339         /* Otherwise it is malformed, or a lifetime */
 340 }
 341
 342 /* Advances the parser one token, optionally skipping whitespace
 343  * (otherwise it is concatenated and returned as a single whitespace token).
 344  * Whitespace is needed to properly render function signatures. Unrecognized
 345  * token starts are stored literally, e.g. token may equal to a character '#'. */
 346 static int advanceToken (lexerState *lexer, boolean skip_whitspace)
 347 {
 348         boolean have_whitespace = FALSE;
 349         lexer->line = getSourceLineNumber();
 350         lexer->pos = getInputFilePosition();
 351         while (lexer->cur_c != EOF)
 352         {
 353                 if (isWhitespace(lexer->cur_c))
 354                 {
 355                         scanWhitespace(lexer);
 356                         have_whitespace = TRUE;
 357                 }
 358                 else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
 359                 {
 360                         scanComments(lexer);
 361                         have_whitespace = TRUE;
 362                 }
 363                 else
 364                 {
 365                         if (have_whitespace && !skip_whitspace)
 366                                 return lexer->cur_token = TOKEN_WHITESPACE;
 367                         break;
 368                 }
 369         }
 370         lexer->line = getSourceLineNumber();
 371         lexer->pos = getInputFilePosition();
 372         while (lexer->cur_c != EOF)
 373         {
 374                 if (lexer->cur_c == '"')
 375                 {
 376                         scanString(lexer);
 377                         return lexer->cur_token = TOKEN_STRING;
 378                 }
 379                 else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
 380                 {
 381                         scanRawString(lexer);
 382                         return lexer->cur_token = TOKEN_STRING;
 383                 }
 384                 else if (lexer->cur_c == '\'')
 385                 {
 386                         scanCharacterOrLifetime(lexer);
 387                         return lexer->cur_token = TOKEN_STRING;
 388                 }
 389                 else if (isIdentifierStart(lexer->cur_c))
 390                 {
 391                         scanIdentifier(lexer);
 392                         return lexer->cur_token = TOKEN_IDENT;
 393                 }
 394                 /* These shift tokens aren't too important for tag-generation per se,
 395                  * but they confuse the skipUntil code which tracks the <> pairs. */
 396                 else if (lexer->cur_c == '>' && lexer->next_c == '>')
 397                 {
 398                         advanceNChar(lexer, 2);
 399                         return lexer->cur_token = TOKEN_RSHIFT;
 400                 }
 401                 else if (lexer->cur_c == '<' && lexer->next_c == '<')
 402                 {
 403                         advanceNChar(lexer, 2);
 404                         return lexer->cur_token = TOKEN_LSHIFT;
 405                 }
 406                 else if (lexer->cur_c == '-' && lexer->next_c == '>')
 407                 {
 408                         advanceNChar(lexer, 2);
 409                         return lexer->cur_token = TOKEN_RARROW;
 410                 }
 411                 else
 412                 {
 413                         int c = lexer->cur_c;
 414                         advanceChar(lexer);
 415                         return lexer->cur_token = c;
 416                 }
 417         }
 418         return lexer->cur_token = TOKEN_EOF;
 419 }
 420
 421 static void initLexer (lexerState *lexer)
 422 {
 423         advanceNChar(lexer, 2);
 424         lexer->token_str = vStringNew();
 425
 426         if (lexer->cur_c == '#' && lexer->next_c == '!')
 427                 scanComments(lexer);
 428         advanceToken(lexer, TRUE);
 429 }
 430
 431 static void deInitLexer (lexerState *lexer)
 432 {
 433         vStringDelete(lexer->token_str);
 434         lexer->token_str = NULL;
 435 }
 436
 437 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
 438 {
 439         if (kind == K_NONE)
 440                 return;
 441         tagEntryInfo tag;
 442         initTagEntry(&tag, ident->buffer);
 443
 444         tag.lineNumber = line;
 445         tag.filePosition = pos;
 446         tag.sourceFileName = getSourceFileName();
 447
 448         tag.kindName = rustKinds[kind].name;
 449         tag.kind = rustKinds[kind].letter;
 450
 451         tag.extensionFields.signature = arg_list;
 452         tag.extensionFields.varType = type;
 453         if (parent_kind != K_NONE)
 454         {
 455                 tag.extensionFields.scope[0] = rustKinds[parent_kind].name;
 456                 tag.extensionFields.scope[1] = scope->buffer;
 457         }
 458         makeTagEntry(&tag);
 459 }
 460
 461 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
 462  * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
 463 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
 464 {
 465         int angle_level = 0;
 466         int paren_level = 0;
 467         int brace_level = 0;
 468         int bracket_level = 0;
 469         while (lexer->cur_token != TOKEN_EOF)
 470         {
 471                 if (angle_level == 0 && paren_level == 0 && brace_level == 0
 472                     && bracket_level == 0)
 473                 {
 474                         int ii = 0;
 475                         for(ii = 0; ii < num_goal_tokens; ii++)
 476                         {
 477                                 if (lexer->cur_token == goal_tokens[ii])
 478                                 {
 479                                         break;
 480                                 }
 481                         }
 482                         if (ii < num_goal_tokens)
 483                                 break;
 484                 }
 485                 switch (lexer->cur_token)
 486                 {
 487                         case '<':
 488                                 angle_level++;
 489                                 break;
 490                         case '(':
 491                                 paren_level++;
 492                                 break;
 493                         case '{':
 494                                 brace_level++;
 495                                 break;
 496                         case '[':
 497                                 bracket_level++;
 498                                 break;
 499                         case '>':
 500                                 angle_level--;
 501                                 break;
 502                         case ')':
 503                                 paren_level--;
 504                                 break;
 505                         case '}':
 506                                 brace_level--;
 507                                 break;
 508                         case ']':
 509                                 bracket_level--;
 510                                 break;
 511                         case TOKEN_RSHIFT:
 512                                 if (angle_level >= 2)
 513                                         angle_level -= 2;
 514                                 break;
 515                         /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
 516                         default:
 517                                 break;
 518                 }
 519                 /* Has to be after the token switch to catch the case when we start with the initial level token */
 520                 if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
 521                     && bracket_level == 0)
 522                         break;
 523                 advanceToken(lexer, TRUE);
 524         }
 525 }
 526
 527 /* Function format:
 528  * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
 529 static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
 530 {
 531         int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
 532         vString *name;
 533         vString *arg_list;
 534         unsigned long line;
 535         MIOPos pos;
 536         int paren_level = 0;
 537         boolean found_paren = FALSE;
 538         boolean valid_signature = TRUE;
 539
 540         advanceToken(lexer, TRUE);
 541         if (lexer->cur_token != TOKEN_IDENT)
 542                 return;
 543
 544         name = vStringNewCopy(lexer->token_str);
 545         arg_list = vStringNew();
 546
 547         line = lexer->line;
 548         pos = lexer->pos;
 549
 550         advanceToken(lexer, TRUE);
 551
 552         /* HACK: This is a bit coarse as far as what tag entry means by
 553          * 'arglist'... */
 554         while (lexer->cur_token != '{' && lexer->cur_token != ';')
 555         {
 556                 if (lexer->cur_token == '}')
 557                 {
 558                         valid_signature = FALSE;
 559                         break;
 560                 }
 561                 else if (lexer->cur_token == '(')
 562                 {
 563                         found_paren = TRUE;
 564                         paren_level++;
 565                 }
 566                 else if (lexer->cur_token == ')')
 567                 {
 568                         paren_level--;
 569                         if (paren_level < 0)
 570                         {
 571                                 valid_signature = FALSE;
 572                                 break;
 573                         }
 574                 }
 575                 else if (lexer->cur_token == TOKEN_EOF)
 576                 {
 577                         valid_signature = FALSE;
 578                         break;
 579                 }
 580                 writeCurTokenToStr(lexer, arg_list);
 581                 advanceToken(lexer, FALSE);
 582         }
 583         if (!found_paren || paren_level != 0)
 584                 valid_signature = FALSE;
 585
 586         if (valid_signature)
 587         {
 588                 vStringStripTrailing(arg_list);
 589                 addTag(name, NULL, arg_list->buffer, kind, line, pos, scope, parent_kind);
 590                 addToScope(scope, name);
 591                 parseBlock(lexer, TRUE, kind, scope);
 592         }
 593
 594         vStringDelete(name);
 595         vStringDelete(arg_list);
 596 }
 597
 598 /* Mod format:
 599  * "mod" <ident> "{" [<body>] "}"
 600  * "mod" <ident> ";"*/
 601 static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
 602 {
 603         advanceToken(lexer, TRUE);
 604         if (lexer->cur_token != TOKEN_IDENT)
 605                 return;
 606
 607         addTag(lexer->token_str, NULL, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
 608         addToScope(scope, lexer->token_str);
 609
 610         advanceToken(lexer, TRUE);
 611
 612         parseBlock(lexer, TRUE, K_MOD, scope);
 613 }
 614
 615 /* Trait format:
 616  * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
 617  */
 618 static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
 619 {
 620         int goal_tokens[] = {'{'};
 621
 622         advanceToken(lexer, TRUE);
 623         if (lexer->cur_token != TOKEN_IDENT)
 624                 return;
 625
 626         addTag(lexer->token_str, NULL, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
 627         addToScope(scope, lexer->token_str);
 628
 629         advanceToken(lexer, TRUE);
 630
 631         skipUntil(lexer, goal_tokens, 1);
 632
 633         parseBlock(lexer, TRUE, K_TRAIT, scope);
 634 }
 635
 636 /* Skips type blocks of the form <T:T<T>, ...> */
 637 static void skipTypeBlock (lexerState *lexer)
 638 {
 639         if (lexer->cur_token == '<')
 640         {
 641                 skipUntil(lexer, NULL, 0);
 642                 advanceToken(lexer, TRUE);
 643         }
 644 }
 645
 646 /* Essentially grabs the last ident before 'for', '<' and '{', which
 647  * tends to correspond to what we want as the impl tag entry name */
 648 static void parseQualifiedType (lexerState *lexer, vString* name)
 649 {
 650         while (lexer->cur_token != TOKEN_EOF)
 651         {
 652                 if (lexer->cur_token == TOKEN_IDENT)
 653                 {
 654                         if (strcmp(lexer->token_str->buffer, "for") == 0
 655                                 || strcmp(lexer->token_str->buffer, "where") == 0)
 656                                 break;
 657                         vStringClear(name);
 658                         vStringCat(name, lexer->token_str);
 659                 }
 660                 else if (lexer->cur_token == '<' || lexer->cur_token == '{')
 661                 {
 662                         break;
 663                 }
 664                 advanceToken(lexer, TRUE);
 665         }
 666         skipTypeBlock(lexer);
 667 }
 668
 669 /* Impl format:
 670  * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
 671  */
 672 static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
 673 {
 674         unsigned long line;
 675         MIOPos pos;
 676         vString *name;
 677
 678         advanceToken(lexer, TRUE);
 679
 680         line = lexer->line;
 681         pos = lexer->pos;
 682
 683         skipTypeBlock(lexer);
 684
 685         name = vStringNew();
 686
 687         parseQualifiedType(lexer, name);
 688
 689         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "for") == 0)
 690         {
 691                 advanceToken(lexer, TRUE);
 692                 parseQualifiedType(lexer, name);
 693         }
 694
 695         addTag(name, NULL, NULL, K_IMPL, line, pos, scope, parent_kind);
 696         addToScope(scope, name);
 697
 698         parseBlock(lexer, TRUE, K_IMPL, scope);
 699
 700         vStringDelete(name);
 701 }
 702
 703 /* Static format:
 704  * "static" ["mut"] <ident>
 705  */
 706 static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
 707 {
 708         advanceToken(lexer, TRUE);
 709         if (lexer->cur_token != TOKEN_IDENT)
 710                 return;
 711         if (strcmp(lexer->token_str->buffer, "mut") == 0)
 712         {
 713                 advanceToken(lexer, TRUE);
 714         }
 715         if (lexer->cur_token != TOKEN_IDENT)
 716                 return;
 717
 718         addTag(lexer->token_str, NULL, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
 719 }
 720
 721 /* Type format:
 722  * "type" <ident>
 723  */
 724 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
 725 {
 726         advanceToken(lexer, TRUE);
 727         if (lexer->cur_token != TOKEN_IDENT)
 728                 return;
 729
 730         addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
 731 }
 732
 733 /* Structs and enums are very similar syntax-wise.
 734  * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
 735  * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
 736  * the enum's definition (e.g. for the type bounds)
 737  *
 738  * Struct/Enum format:
 739  * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
 740  * "struct/enum" <ident>[<type_bounds>] ";"
 741  * */
 742 static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, boolean is_struct)
 743 {
 744         int kind = is_struct ? K_STRUCT : K_ENUM;
 745         int field_kind = is_struct ? K_FIELD : K_VARIANT;
 746         int goal_tokens1[] = {';', '{'};
 747
 748         advanceToken(lexer, TRUE);
 749         if (lexer->cur_token != TOKEN_IDENT)
 750                 return;
 751
 752         addTag(lexer->token_str, NULL, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
 753         addToScope(scope, lexer->token_str);
 754
 755         skipUntil(lexer, goal_tokens1, 2);
 756
 757         if (lexer->cur_token == '{')
 758         {
 759                 vString *field_name = vStringNew();
 760                 while (lexer->cur_token != TOKEN_EOF)
 761                 {
 762                         int goal_tokens2[] = {'}', ','};
 763                         /* Skip attributes. Format:
 764                          * #[..] or #![..]
 765                          * */
 766                         if (lexer->cur_token == '#')
 767                         {
 768                                 advanceToken(lexer, TRUE);
 769                                 if (lexer->cur_token == '!')
 770                                         advanceToken(lexer, TRUE);
 771                                 if (lexer->cur_token == '[')
 772                                 {
 773                                         /* It's an attribute, skip it. */
 774                                         skipUntil(lexer, NULL, 0);
 775                                 }
 776                                 else
 777                                 {
 778                                         /* Something's up with this field, skip to the next one */
 779                                         skipUntil(lexer, goal_tokens2, 2);
 780                                         continue;
 781                                 }
 782                         }
 783                         if (lexer->cur_token == TOKEN_IDENT)
 784                         {
 785                                 if (strcmp(lexer->token_str->buffer, "priv") == 0
 786                                     || strcmp(lexer->token_str->buffer, "pub") == 0)
 787                                 {
 788                                         advanceToken(lexer, TRUE);
 789                                         if (lexer->cur_token != TOKEN_IDENT)
 790                                         {
 791                                                 /* Something's up with this field, skip to the next one */
 792                                                 skipUntil(lexer, goal_tokens2, 2);
 793                                                 continue;
 794                                         }
 795                                 }
 796
 797                                 vStringClear(field_name);
 798                                 vStringCat(field_name, lexer->token_str);
 799                                 addTag(field_name, NULL, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
 800                                 skipUntil(lexer, goal_tokens2, 2);
 801                         }
 802                         if (lexer->cur_token == '}')
 803                         {
 804                                 advanceToken(lexer, TRUE);
 805                                 break;
 806                         }
 807                         advanceToken(lexer, TRUE);
 808                 }
 809                 vStringDelete(field_name);
 810         }
 811 }
 812
 813 /* Skip the body of the macro. Can't use skipUntil here as
 814  * the body of the macro may have arbitrary code which confuses it (e.g.
 815  * bitshift operators/function return arrows) */
 816 static void skipMacro (lexerState *lexer)
 817 {
 818         int level = 0;
 819         int plus_token = 0;
 820         int minus_token = 0;
 821
 822         advanceToken(lexer, TRUE);
 823         switch (lexer->cur_token)
 824         {
 825                 case '(':
 826                         plus_token = '(';
 827                         minus_token = ')';
 828                         break;
 829                 case '{':
 830                         plus_token = '{';
 831                         minus_token = '}';
 832                         break;
 833                 case '[':
 834                         plus_token = '[';
 835                         minus_token = ']';
 836                         break;
 837                 default:
 838                         return;
 839         }
 840
 841         while (lexer->cur_token != TOKEN_EOF)
 842         {
 843                 if (lexer->cur_token == plus_token)
 844                         level++;
 845                 else if (lexer->cur_token == minus_token)
 846                         level--;
 847                 if (level == 0)
 848                         break;
 849                 advanceToken(lexer, TRUE);
 850         }
 851         advanceToken(lexer, TRUE);
 852 }
 853
 854 /*
 855  * Macro rules format:
 856  * "macro_rules" "!" <ident> <macro_body>
 857  */
 858 static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
 859 {
 860         advanceToken(lexer, TRUE);
 861
 862         if (lexer->cur_token != '!')
 863                 return;
 864
 865         advanceToken(lexer, TRUE);
 866
 867         if (lexer->cur_token != TOKEN_IDENT)
 868                 return;
 869
 870         addTag(lexer->token_str, NULL, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
 871
 872         skipMacro(lexer);
 873 }
 874
 875 /*
 876  * Rust is very liberal with nesting, so this function is used pretty much for any block
 877  */
 878 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope)
 879 {
 880         int level = 1;
 881         if (delim)
 882         {
 883                 if (lexer->cur_token != '{')
 884                         return;
 885                 advanceToken(lexer, TRUE);
 886         }
 887         while (lexer->cur_token != TOKEN_EOF)
 888         {
 889                 if (lexer->cur_token == TOKEN_IDENT)
 890                 {
 891                         size_t old_scope_len = vStringLength(scope);
 892                         if (strcmp(lexer->token_str->buffer, "fn") == 0)
 893                         {
 894                                 parseFn(lexer, scope, kind);
 895                         }
 896                         else if(strcmp(lexer->token_str->buffer, "mod") == 0)
 897                         {
 898                                 parseMod(lexer, scope, kind);
 899                         }
 900                         else if(strcmp(lexer->token_str->buffer, "static") == 0)
 901                         {
 902                                 parseStatic(lexer, scope, kind);
 903                         }
 904                         else if(strcmp(lexer->token_str->buffer, "trait") == 0)
 905                         {
 906                                 parseTrait(lexer, scope, kind);
 907                         }
 908                         else if(strcmp(lexer->token_str->buffer, "type") == 0)
 909                         {
 910                                 parseType(lexer, scope, kind);
 911                         }
 912                         else if(strcmp(lexer->token_str->buffer, "impl") == 0)
 913                         {
 914                                 parseImpl(lexer, scope, kind);
 915                         }
 916                         else if(strcmp(lexer->token_str->buffer, "struct") == 0)
 917                         {
 918                                 parseStructOrEnum(lexer, scope, kind, TRUE);
 919                         }
 920                         else if(strcmp(lexer->token_str->buffer, "enum") == 0)
 921                         {
 922                                 parseStructOrEnum(lexer, scope, kind, FALSE);
 923                         }
 924                         else if(strcmp(lexer->token_str->buffer, "macro_rules") == 0)
 925                         {
 926                                 parseMacroRules(lexer, scope, kind);
 927                         }
 928                         else
 929                         {
 930                                 advanceToken(lexer, TRUE);
 931                                 if (lexer->cur_token == '!')
 932                                 {
 933                                         skipMacro(lexer);
 934                                 }
 935                         }
 936                         resetScope(scope, old_scope_len);
 937                 }
 938                 else if (lexer->cur_token == '{')
 939                 {
 940                         level++;
 941                         advanceToken(lexer, TRUE);
 942                 }
 943                 else if (lexer->cur_token == '}')
 944                 {
 945                         level--;
 946                         advanceToken(lexer, TRUE);
 947                 }
 948                 else if (lexer->cur_token == '\'')
 949                 {
 950                         /* Skip over the 'static lifetime, as it confuses the static parser above */
 951                         advanceToken(lexer, TRUE);
 952                         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "static") == 0)
 953                                 advanceToken(lexer, TRUE);
 954                 }
 955                 else
 956                 {
 957                         advanceToken(lexer, TRUE);
 958                 }
 959                 if (delim && level <= 0)
 960                         break;
 961         }
 962 }
 963
 964 static void findRustTags (void)
 965 {
 966         lexerState lexer;
 967         vString* scope = vStringNew();
 968         initLexer(&lexer);
 969
 970         parseBlock(&lexer, FALSE, K_NONE, scope);
 971         vStringDelete(scope);
 972
 973         deInitLexer(&lexer);
 974 }
 975
 976 extern parserDefinition *RustParser (void)
 977 {
 978         static const char *const extensions[] = { "rs", NULL };
 979         parserDefinition *def = parserNew ("Rust");
 980         def->kinds = rustKinds;
 981         def->kindCount = KIND_COUNT (rustKinds);
 982         def->extensions = extensions;
 983         def->parser = findRustTags;
 984
 985         return def;
 986 }