tagmanager/ctags/rust.c

   1 /*
   2 *
   3 *   This source code is released for free distribution under the terms of the
   4 *   GNU General Public License.
   5 *
   6 *   This module contains functions for generating tags for Rust files.
   7 */
   8
   9 /*
  10 *   INCLUDE FILES
  11 */
  12 #include "general.h"    /* must always come first */
  13 #include "main.h"
  14
  15 #include <string.h>
  16
  17 #include "keyword.h"
  18 #include "parse.h"
  19 #include "entry.h"
  20 #include "options.h"
  21 #include "read.h"
  22 #include "vstring.h"
  23
  24 /*
  25 *   MACROS
  26 */
  27 #define MAX_STRING_LENGTH 256
  28
  29 /*
  30 *   DATA DECLARATIONS
  31 */
  32
  33 typedef enum {
  34         K_MOD,
  35         K_STRUCT,
  36         K_TRAIT,
  37         K_IMPL,
  38         K_FN,
  39         K_ENUM,
  40         K_TYPE,
  41         K_STATIC,
  42         K_MACRO,
  43         K_FIELD,
  44         K_VARIANT,
  45         K_METHOD,
  46         K_NONE
  47 } RustKind;
  48
  49 static kindOption rustKinds[] = {
  50         {TRUE, 'n', "namespace", "module"},
  51         {TRUE, 's', "struct", "structural type"},
  52         {TRUE, 'i', "interface", "trait interface"},
  53         {TRUE, 'c', "class", "implementation"},
  54         {TRUE, 'f', "function", "Function"},
  55         {TRUE, 'g', "enum", "Enum"},
  56         {TRUE, 't', "typedef", "Type Alias"},
  57         {TRUE, 'v', "variable", "Global variable"},
  58         {TRUE, 'M', "macro", "Macro Definition"},
  59         {TRUE, 'm', "field", "A struct field"},
  60         {TRUE, 'e', "enumerator", "An enum variant"},
  61         {TRUE, 'F', "method", "A method"},
  62 };
  63
  64 typedef enum {
  65         TOKEN_WHITESPACE,
  66         TOKEN_STRING,
  67         TOKEN_IDENT,
  68         TOKEN_LSHIFT,
  69         TOKEN_RSHIFT,
  70         TOKEN_RARROW,
  71         TOKEN_EOF
  72 } tokenType;
  73
  74 typedef struct {
  75         /* Characters */
  76         int cur_c;
  77         int next_c;
  78
  79         /* Tokens */
  80         int cur_token;
  81         vString* token_str;
  82         unsigned long line;
  83         MIOPos pos;
  84 } lexerState;
  85
  86 /*
  87 *   FUNCTION PROTOTYPES
  88 */
  89
  90 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope);
  91
  92 /*
  93 *   FUNCTION DEFINITIONS
  94 */
  95
  96 /* Resets the scope string to the old length */
  97 static void resetScope (vString *scope, size_t old_len)
  98 {
  99         scope->length = old_len;
 100         scope->buffer[old_len] = '\0';
 101 }
 102
 103 /* Adds a name to the end of the scope string */
 104 static void addToScope (vString *scope, vString *name)
 105 {
 106         if (vStringLength(scope) > 0)
 107                 vStringCatS(scope, "::");
 108         vStringCat(scope, name);
 109 }
 110
 111 /* Write the lexer's current token to string, taking care of special tokens */
 112 static void writeCurTokenToStr (lexerState *lexer, vString *out_str)
 113 {
 114         switch (lexer->cur_token)
 115         {
 116                 case TOKEN_IDENT:
 117                         vStringCat(out_str, lexer->token_str);
 118                         break;
 119                 case TOKEN_STRING:
 120                         vStringCat(out_str, lexer->token_str);
 121                         break;
 122                 case TOKEN_WHITESPACE:
 123                         vStringPut(out_str, ' ');
 124                         break;
 125                 case TOKEN_LSHIFT:
 126                         vStringCatS(out_str, "<<");
 127                         break;
 128                 case TOKEN_RSHIFT:
 129                         vStringCatS(out_str, ">>");
 130                         break;
 131                 case TOKEN_RARROW:
 132                         vStringCatS(out_str, "->");
 133                         break;
 134                 default:
 135                         vStringPut(out_str, (char) lexer->cur_token);
 136         }
 137 }
 138
 139 /* Reads a character from the file */
 140 static void advanceChar (lexerState *lexer)
 141 {
 142         lexer->cur_c = lexer->next_c;
 143         lexer->next_c = fileGetc();
 144 }
 145
 146 /* Reads N characters from the file */
 147 static void advanceNChar (lexerState *lexer, int n)
 148 {
 149         while (n--)
 150                 advanceChar(lexer);
 151 }
 152
 153 /* Store the current character in lexerState::token_str if there is space
 154  * (set by MAX_STRING_LENGTH), and then read the next character from the file */
 155 static void advanceAndStoreChar (lexerState *lexer)
 156 {
 157         if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH)
 158                 vStringPut(lexer->token_str, (char) lexer->cur_c);
 159         advanceChar(lexer);
 160 }
 161
 162 static boolean isWhitespace (int c)
 163 {
 164         return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 165 }
 166
 167 static boolean isAscii (int c)
 168 {
 169         return (c >= 0) && (c < 0x80);
 170 }
 171
 172 /* This isn't quite right for Unicode identifiers */
 173 static boolean isIdentifierStart (int c)
 174 {
 175         return (isAscii(c) && (isalpha(c) || c == '_')) || !isAscii(c);
 176 }
 177
 178 /* This isn't quite right for Unicode identifiers */
 179 static boolean isIdentifierContinue (int c)
 180 {
 181         return (isAscii(c) && (isalnum(c) || c == '_')) || !isAscii(c);
 182 }
 183
 184 static void scanWhitespace (lexerState *lexer)
 185 {
 186         while (isWhitespace(lexer->cur_c))
 187                 advanceChar(lexer);
 188 }
 189
 190 /* Normal line comments start with two /'s and continue until the next \n
 191  * (potentially after a \r). Additionally, a shebang in the beginning of the
 192  * file also counts as a line comment as long as it is not this sequence: #![ .
 193  * Block comments start with / followed by a * and end with a * followed by a /.
 194  * Unlike in C/C++ they nest. */
 195 static void scanComments (lexerState *lexer)
 196 {
 197         /* // */
 198         if (lexer->next_c == '/')
 199         {
 200                 advanceNChar(lexer, 2);
 201                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 202                         advanceChar(lexer);
 203         }
 204         /* #! */
 205         else if (lexer->next_c == '!')
 206         {
 207                 advanceNChar(lexer, 2);
 208                 /* If it is exactly #![ then it is not a comment, but an attribute */
 209                 if (lexer->cur_c == '[')
 210                         return;
 211                 while (lexer->cur_c != EOF && lexer->cur_c != '\n')
 212                         advanceChar(lexer);
 213         }
 214         /* block comment */
 215         else if (lexer->next_c == '*')
 216         {
 217                 int level = 1;
 218                 advanceNChar(lexer, 2);
 219                 while (lexer->cur_c != EOF && level > 0)
 220                 {
 221                         if (lexer->cur_c == '*' && lexer->next_c == '/')
 222                         {
 223                                 level--;
 224                                 advanceNChar(lexer, 2);
 225                         }
 226                         else if (lexer->cur_c == '/' && lexer->next_c == '*')
 227                         {
 228                                 level++;
 229                                 advanceNChar(lexer, 2);
 230                         }
 231                         else
 232                         {
 233                                 advanceChar(lexer);
 234                         }
 235                 }
 236         }
 237 }
 238
 239 static void scanIdentifier (lexerState *lexer)
 240 {
 241         vStringClear(lexer->token_str);
 242         do
 243         {
 244                 advanceAndStoreChar(lexer);
 245         } while(lexer->cur_c != EOF && isIdentifierContinue(lexer->cur_c));
 246 }
 247
 248 /* Double-quoted strings, we only care about the \" escape. These
 249  * last past the end of the line, so be careful not too store too much
 250  * of them (see MAX_STRING_LENGTH). The only place we look at their
 251  * contents is in the function definitions, and there the valid strings are
 252  * things like "C" and "Rust" */
 253 static void scanString (lexerState *lexer)
 254 {
 255         vStringClear(lexer->token_str);
 256         advanceAndStoreChar(lexer);
 257         while (lexer->cur_c != EOF && lexer->cur_c != '"')
 258         {
 259                 if (lexer->cur_c == '\\' && lexer->next_c == '"')
 260                         advanceAndStoreChar(lexer);
 261                 advanceAndStoreChar(lexer);
 262         }
 263         advanceAndStoreChar(lexer);
 264 }
 265
 266 /* Raw strings look like this: r"" or r##""## where the number of
 267  * hashes must match */
 268 static void scanRawString (lexerState *lexer)
 269 {
 270         size_t num_initial_hashes = 0;
 271         vStringClear(lexer->token_str);
 272         advanceAndStoreChar(lexer);
 273         /* Count how many leading hashes there are */
 274         while (lexer->cur_c == '#')
 275         {
 276                 num_initial_hashes++;
 277                 advanceAndStoreChar(lexer);
 278         }
 279         if (lexer->cur_c != '"')
 280                 return;
 281         advanceAndStoreChar(lexer);
 282         while (lexer->cur_c != EOF)
 283         {
 284                 /* Count how many trailing hashes there are. If the number is equal or more
 285                  * than the number of leading hashes, break. */
 286                 if (lexer->cur_c == '"')
 287                 {
 288                         size_t num_trailing_hashes = 0;
 289                         advanceAndStoreChar(lexer);
 290                         while (lexer->cur_c == '#' && num_trailing_hashes < num_initial_hashes)
 291                         {
 292                                 num_trailing_hashes++;
 293
 294                                 advanceAndStoreChar(lexer);
 295                         }
 296                         if (num_trailing_hashes == num_initial_hashes)
 297                                 break;
 298                 }
 299                 else
 300                 {
 301                         advanceAndStoreChar(lexer);
 302                 }
 303         }
 304 }
 305
 306 /* This deals with character literals: 'n', '\n', '\uFFFF'; and lifetimes:
 307  * 'lifetime. We'll use this approximate regexp for the literals:
 308  * \' \\ [^']+ \' or \' [^'] \' or \' \\ \' \'. Either way, we'll treat this
 309  * token as a string, so it gets preserved as is for function signatures with
 310  * lifetimes. */
 311 static void scanCharacterOrLifetime (lexerState *lexer)
 312 {
 313         vStringClear(lexer->token_str);
 314         advanceAndStoreChar(lexer);
 315
 316         if (lexer->cur_c == '\\')
 317         {
 318                 advanceAndStoreChar(lexer);
 319                 /* The \' \\ \' \' (literally '\'') case */
 320                 if (lexer->cur_c == '\'' && lexer->next_c == '\'')
 321                 {
 322                         advanceAndStoreChar(lexer);
 323                         advanceAndStoreChar(lexer);
 324                 }
 325                 /* The \' \\ [^']+ \' case */
 326                 else
 327                 {
 328                         while (lexer->cur_c != EOF && lexer->cur_c != '\'')
 329                                 advanceAndStoreChar(lexer);
 330                 }
 331         }
 332         /* The \' [^'] \' case */
 333         else if (lexer->cur_c != '\'' && lexer->next_c == '\'')
 334         {
 335                 advanceAndStoreChar(lexer);
 336                 advanceAndStoreChar(lexer);
 337         }
 338         /* Otherwise it is malformed, or a lifetime */
 339 }
 340
 341 /* Advances the parser one token, optionally skipping whitespace
 342  * (otherwise it is concatenated and returned as a single whitespace token).
 343  * Whitespace is needed to properly render function signatures. Unrecognized
 344  * token starts are stored literally, e.g. token may equal to a character '#'. */
 345 static int advanceToken (lexerState *lexer, boolean skip_whitspace)
 346 {
 347         boolean have_whitespace = FALSE;
 348         lexer->line = getSourceLineNumber();
 349         lexer->pos = getInputFilePosition();
 350         while (lexer->cur_c != EOF)
 351         {
 352                 if (isWhitespace(lexer->cur_c))
 353                 {
 354                         scanWhitespace(lexer);
 355                         have_whitespace = TRUE;
 356                 }
 357                 else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*'))
 358                 {
 359                         scanComments(lexer);
 360                         have_whitespace = TRUE;
 361                 }
 362                 else
 363                 {
 364                         if (have_whitespace && !skip_whitspace)
 365                                 return lexer->cur_token = TOKEN_WHITESPACE;
 366                         break;
 367                 }
 368         }
 369         lexer->line = getSourceLineNumber();
 370         lexer->pos = getInputFilePosition();
 371         while (lexer->cur_c != EOF)
 372         {
 373                 if (lexer->cur_c == '"')
 374                 {
 375                         scanString(lexer);
 376                         return lexer->cur_token = TOKEN_STRING;
 377                 }
 378                 else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"'))
 379                 {
 380                         scanRawString(lexer);
 381                         return lexer->cur_token = TOKEN_STRING;
 382                 }
 383                 else if (lexer->cur_c == '\'')
 384                 {
 385                         scanCharacterOrLifetime(lexer);
 386                         return lexer->cur_token = TOKEN_STRING;
 387                 }
 388                 else if (isIdentifierStart(lexer->cur_c))
 389                 {
 390                         scanIdentifier(lexer);
 391                         return lexer->cur_token = TOKEN_IDENT;
 392                 }
 393                 /* These shift tokens aren't too important for tag-generation per se,
 394                  * but they confuse the skipUntil code which tracks the <> pairs. */
 395                 else if (lexer->cur_c == '>' && lexer->next_c == '>')
 396                 {
 397                         advanceNChar(lexer, 2);
 398                         return lexer->cur_token = TOKEN_RSHIFT;
 399                 }
 400                 else if (lexer->cur_c == '<' && lexer->next_c == '<')
 401                 {
 402                         advanceNChar(lexer, 2);
 403                         return lexer->cur_token = TOKEN_LSHIFT;
 404                 }
 405                 else if (lexer->cur_c == '-' && lexer->next_c == '>')
 406                 {
 407                         advanceNChar(lexer, 2);
 408                         return lexer->cur_token = TOKEN_RARROW;
 409                 }
 410                 else
 411                 {
 412                         int c = lexer->cur_c;
 413                         advanceChar(lexer);
 414                         return lexer->cur_token = c;
 415                 }
 416         }
 417         return lexer->cur_token = TOKEN_EOF;
 418 }
 419
 420 static void initLexer (lexerState *lexer)
 421 {
 422         advanceNChar(lexer, 2);
 423         lexer->token_str = vStringNew();
 424
 425         if (lexer->cur_c == '#' && lexer->next_c == '!')
 426                 scanComments(lexer);
 427         advanceToken(lexer, TRUE);
 428 }
 429
 430 static void deInitLexer (lexerState *lexer)
 431 {
 432         vStringDelete(lexer->token_str);
 433         lexer->token_str = NULL;
 434 }
 435
 436 static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind)
 437 {
 438         if (kind == K_NONE)
 439                 return;
 440         tagEntryInfo tag;
 441         initTagEntry(&tag, ident->buffer);
 442
 443         tag.lineNumber = line;
 444         tag.filePosition = pos;
 445         tag.sourceFileName = getSourceFileName();
 446
 447         tag.kindName = rustKinds[kind].name;
 448         tag.kind = rustKinds[kind].letter;
 449
 450         tag.extensionFields.arglist = arg_list;
 451         tag.extensionFields.varType = type;
 452         if (parent_kind != K_NONE)
 453         {
 454                 tag.extensionFields.scope[0] = rustKinds[parent_kind].name;
 455                 tag.extensionFields.scope[1] = scope->buffer;
 456         }
 457         makeTagEntry(&tag);
 458 }
 459
 460 /* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens.
 461  * Keeps track of balanced <>'s, ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */
 462 static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens)
 463 {
 464         int angle_level = 0;
 465         int paren_level = 0;
 466         int brace_level = 0;
 467         int bracket_level = 0;
 468         while (lexer->cur_token != TOKEN_EOF)
 469         {
 470                 if (angle_level == 0 && paren_level == 0 && brace_level == 0
 471                     && bracket_level == 0)
 472                 {
 473                         int ii = 0;
 474                         for(ii = 0; ii < num_goal_tokens; ii++)
 475                         {
 476                                 if (lexer->cur_token == goal_tokens[ii])
 477                                 {
 478                                         break;
 479                                 }
 480                         }
 481                         if (ii < num_goal_tokens)
 482                                 break;
 483                 }
 484                 switch (lexer->cur_token)
 485                 {
 486                         case '<':
 487                                 angle_level++;
 488                                 break;
 489                         case '(':
 490                                 paren_level++;
 491                                 break;
 492                         case '{':
 493                                 brace_level++;
 494                                 break;
 495                         case '[':
 496                                 bracket_level++;
 497                                 break;
 498                         case '>':
 499                                 angle_level--;
 500                                 break;
 501                         case ')':
 502                                 paren_level--;
 503                                 break;
 504                         case '}':
 505                                 brace_level--;
 506                                 break;
 507                         case ']':
 508                                 bracket_level--;
 509                                 break;
 510                         case TOKEN_RSHIFT:
 511                                 if (angle_level >= 2)
 512                                         angle_level -= 2;
 513                                 break;
 514                         /* TOKEN_LSHIFT is never interpreted as two <'s in valid Rust code */
 515                         default:
 516                                 break;
 517                 }
 518                 /* Has to be after the token switch to catch the case when we start with the initial level token */
 519                 if (num_goal_tokens == 0 && angle_level == 0 && paren_level == 0 && brace_level == 0
 520                     && bracket_level == 0)
 521                         break;
 522                 advanceToken(lexer, TRUE);
 523         }
 524 }
 525
 526 /* Function format:
 527  * "fn" <ident>[<type_bounds>] "(" [<args>] ")" ["->" <ret_type>] "{" [<body>] "}"*/
 528 static void parseFn (lexerState *lexer, vString *scope, int parent_kind)
 529 {
 530         int kind = (parent_kind == K_TRAIT || parent_kind == K_IMPL) ? K_METHOD : K_FN;
 531         vString *name;
 532         vString *arg_list;
 533         unsigned long line;
 534         MIOPos pos;
 535         int paren_level = 0;
 536         boolean found_paren = FALSE;
 537         boolean valid_signature = TRUE;
 538
 539         advanceToken(lexer, TRUE);
 540         if (lexer->cur_token != TOKEN_IDENT)
 541                 return;
 542
 543         name = vStringNewCopy(lexer->token_str);
 544         arg_list = vStringNew();
 545
 546         line = lexer->line;
 547         pos = lexer->pos;
 548
 549         advanceToken(lexer, TRUE);
 550
 551         /* HACK: This is a bit coarse as far as what tag entry means by
 552          * 'arglist'... */
 553         while (lexer->cur_token != '{' && lexer->cur_token != ';')
 554         {
 555                 if (lexer->cur_token == '}')
 556                 {
 557                         valid_signature = FALSE;
 558                         break;
 559                 }
 560                 else if (lexer->cur_token == '(')
 561                 {
 562                         found_paren = TRUE;
 563                         paren_level++;
 564                 }
 565                 else if (lexer->cur_token == ')')
 566                 {
 567                         paren_level--;
 568                         if (paren_level < 0)
 569                         {
 570                                 valid_signature = FALSE;
 571                                 break;
 572                         }
 573                 }
 574                 else if (lexer->cur_token == TOKEN_EOF)
 575                 {
 576                         valid_signature = FALSE;
 577                         break;
 578                 }
 579                 writeCurTokenToStr(lexer, arg_list);
 580                 advanceToken(lexer, FALSE);
 581         }
 582         if (!found_paren || paren_level != 0)
 583                 valid_signature = FALSE;
 584
 585         if (valid_signature)
 586         {
 587                 vStringStripTrailing(arg_list);
 588                 addTag(name, NULL, arg_list->buffer, kind, line, pos, scope, parent_kind);
 589                 addToScope(scope, name);
 590                 parseBlock(lexer, TRUE, kind, scope);
 591         }
 592
 593         vStringDelete(name);
 594         vStringDelete(arg_list);
 595 }
 596
 597 /* Mod format:
 598  * "mod" <ident> "{" [<body>] "}"
 599  * "mod" <ident> ";"*/
 600 static void parseMod (lexerState *lexer, vString *scope, int parent_kind)
 601 {
 602         advanceToken(lexer, TRUE);
 603         if (lexer->cur_token != TOKEN_IDENT)
 604                 return;
 605
 606         addTag(lexer->token_str, NULL, NULL, K_MOD, lexer->line, lexer->pos, scope, parent_kind);
 607         addToScope(scope, lexer->token_str);
 608
 609         advanceToken(lexer, TRUE);
 610
 611         parseBlock(lexer, TRUE, K_MOD, scope);
 612 }
 613
 614 /* Trait format:
 615  * "trait" <ident> [<type_bounds>] "{" [<body>] "}"
 616  */
 617 static void parseTrait (lexerState *lexer, vString *scope, int parent_kind)
 618 {
 619         int goal_tokens[] = {'{'};
 620
 621         advanceToken(lexer, TRUE);
 622         if (lexer->cur_token != TOKEN_IDENT)
 623                 return;
 624
 625         addTag(lexer->token_str, NULL, NULL, K_TRAIT, lexer->line, lexer->pos, scope, parent_kind);
 626         addToScope(scope, lexer->token_str);
 627
 628         advanceToken(lexer, TRUE);
 629
 630         skipUntil(lexer, goal_tokens, 1);
 631
 632         parseBlock(lexer, TRUE, K_TRAIT, scope);
 633 }
 634
 635 /* Skips type blocks of the form <T:T<T>, ...> */
 636 static void skipTypeBlock (lexerState *lexer)
 637 {
 638         if (lexer->cur_token == '<')
 639         {
 640                 skipUntil(lexer, NULL, 0);
 641                 advanceToken(lexer, TRUE);
 642         }
 643 }
 644
 645 /* Essentially grabs the last ident before 'for', '<' and '{', which
 646  * tends to correspond to what we want as the impl tag entry name */
 647 static void parseQualifiedType (lexerState *lexer, vString* name)
 648 {
 649         while (lexer->cur_token != TOKEN_EOF)
 650         {
 651                 if (lexer->cur_token == TOKEN_IDENT)
 652                 {
 653                         if (strcmp(lexer->token_str->buffer, "for") == 0)
 654                                 break;
 655                         vStringClear(name);
 656                         vStringCat(name, lexer->token_str);
 657                 }
 658                 else if (lexer->cur_token == '<' || lexer->cur_token == '{')
 659                 {
 660                         break;
 661                 }
 662                 advanceToken(lexer, TRUE);
 663         }
 664         skipTypeBlock(lexer);
 665 }
 666
 667 /* Impl format:
 668  * "impl" [<type_bounds>] <qualified_ident>[<type_bounds>] ["for" <qualified_ident>[<type_bounds>]] "{" [<body>] "}"
 669  */
 670 static void parseImpl (lexerState *lexer, vString *scope, int parent_kind)
 671 {
 672         unsigned long line;
 673         MIOPos pos;
 674         vString *name;
 675
 676         advanceToken(lexer, TRUE);
 677
 678         line = lexer->line;
 679         pos = lexer->pos;
 680
 681         skipTypeBlock(lexer);
 682
 683         name = vStringNew();
 684
 685         parseQualifiedType(lexer, name);
 686
 687         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "for") == 0)
 688         {
 689                 advanceToken(lexer, TRUE);
 690                 parseQualifiedType(lexer, name);
 691         }
 692
 693         addTag(name, NULL, NULL, K_IMPL, line, pos, scope, parent_kind);
 694         addToScope(scope, name);
 695
 696         parseBlock(lexer, TRUE, K_IMPL, scope);
 697
 698         vStringDelete(name);
 699 }
 700
 701 /* Static format:
 702  * "static" ["mut"] <ident>
 703  */
 704 static void parseStatic (lexerState *lexer, vString *scope, int parent_kind)
 705 {
 706         advanceToken(lexer, TRUE);
 707         if (lexer->cur_token != TOKEN_IDENT)
 708                 return;
 709         if (strcmp(lexer->token_str->buffer, "mut") == 0)
 710         {
 711                 advanceToken(lexer, TRUE);
 712         }
 713         if (lexer->cur_token != TOKEN_IDENT)
 714                 return;
 715
 716         addTag(lexer->token_str, NULL, NULL, K_STATIC, lexer->line, lexer->pos, scope, parent_kind);
 717 }
 718
 719 /* Type format:
 720  * "type" <ident>
 721  */
 722 static void parseType (lexerState *lexer, vString *scope, int parent_kind)
 723 {
 724         advanceToken(lexer, TRUE);
 725         if (lexer->cur_token != TOKEN_IDENT)
 726                 return;
 727
 728         addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind);
 729 }
 730
 731 /* Structs and enums are very similar syntax-wise.
 732  * It is possible to parse variants a bit more cleverly (e.g. make tuple variants functions and
 733  * struct variants structs) but it'd be too clever and the signature wouldn't make too much sense without
 734  * the enum's definition (e.g. for the type bounds)
 735  *
 736  * Struct/Enum format:
 737  * "struct/enum" <ident>[<type_bounds>] "{" [<ident>,]+ "}"
 738  * "struct/enum" <ident>[<type_bounds>] ";"
 739  * */
 740 static void parseStructOrEnum (lexerState *lexer, vString *scope, int parent_kind, boolean is_struct)
 741 {
 742         int kind = is_struct ? K_STRUCT : K_ENUM;
 743         int field_kind = is_struct ? K_FIELD : K_VARIANT;
 744         int goal_tokens1[] = {';', '{'};
 745
 746         advanceToken(lexer, TRUE);
 747         if (lexer->cur_token != TOKEN_IDENT)
 748                 return;
 749
 750         addTag(lexer->token_str, NULL, NULL, kind, lexer->line, lexer->pos, scope, parent_kind);
 751         addToScope(scope, lexer->token_str);
 752
 753         skipUntil(lexer, goal_tokens1, 2);
 754
 755         if (lexer->cur_token == '{')
 756         {
 757                 vString *field_name = vStringNew();
 758                 while (lexer->cur_token != TOKEN_EOF)
 759                 {
 760                         int goal_tokens2[] = {'}', ','};
 761                         /* Skip attributes. Format:
 762                          * #[..] or #![..]
 763                          * */
 764                         if (lexer->cur_token == '#')
 765                         {
 766                                 advanceToken(lexer, TRUE);
 767                                 if (lexer->cur_token == '!')
 768                                         advanceToken(lexer, TRUE);
 769                                 if (lexer->cur_token == '[')
 770                                 {
 771                                         /* It's an attribute, skip it. */
 772                                         skipUntil(lexer, NULL, 0);
 773                                 }
 774                                 else
 775                                 {
 776                                         /* Something's up with this field, skip to the next one */
 777                                         skipUntil(lexer, goal_tokens2, 2);
 778                                         continue;
 779                                 }
 780                         }
 781                         if (lexer->cur_token == TOKEN_IDENT)
 782                         {
 783                                 if (strcmp(lexer->token_str->buffer, "priv") == 0
 784                                     || strcmp(lexer->token_str->buffer, "pub") == 0)
 785                                 {
 786                                         advanceToken(lexer, TRUE);
 787                                         if (lexer->cur_token != TOKEN_IDENT)
 788                                         {
 789                                                 /* Something's up with this field, skip to the next one */
 790                                                 skipUntil(lexer, goal_tokens2, 2);
 791                                                 continue;
 792                                         }
 793                                 }
 794
 795                                 vStringClear(field_name);
 796                                 vStringCat(field_name, lexer->token_str);
 797                                 addTag(field_name, NULL, NULL, field_kind, lexer->line, lexer->pos, scope, kind);
 798                                 skipUntil(lexer, goal_tokens2, 2);
 799                         }
 800                         if (lexer->cur_token == '}')
 801                         {
 802                                 advanceToken(lexer, TRUE);
 803                                 break;
 804                         }
 805                         advanceToken(lexer, TRUE);
 806                 }
 807                 vStringDelete(field_name);
 808         }
 809 }
 810
 811 /* Skip the body of the macro. Can't use skipUntil here as
 812  * the body of the macro may have arbitrary code which confuses it (e.g.
 813  * bitshift operators/function return arrows) */
 814 static void skipMacro (lexerState *lexer)
 815 {
 816         int level = 0;
 817         int plus_token = 0;
 818         int minus_token = 0;
 819
 820         advanceToken(lexer, TRUE);
 821         switch (lexer->cur_token)
 822         {
 823                 case '(':
 824                         plus_token = '(';
 825                         minus_token = ')';
 826                         break;
 827                 case '{':
 828                         plus_token = '{';
 829                         minus_token = '}';
 830                         break;
 831                 case '[':
 832                         plus_token = '[';
 833                         minus_token = ']';
 834                         break;
 835                 default:
 836                         return;
 837         }
 838
 839         while (lexer->cur_token != TOKEN_EOF)
 840         {
 841                 if (lexer->cur_token == plus_token)
 842                         level++;
 843                 else if (lexer->cur_token == minus_token)
 844                         level--;
 845                 if (level == 0)
 846                         break;
 847                 advanceToken(lexer, TRUE);
 848         }
 849         advanceToken(lexer, TRUE);
 850 }
 851
 852 /*
 853  * Macro rules format:
 854  * "macro_rules" "!" <ident> <macro_body>
 855  */
 856 static void parseMacroRules (lexerState *lexer, vString *scope, int parent_kind)
 857 {
 858         advanceToken(lexer, TRUE);
 859
 860         if (lexer->cur_token != '!')
 861                 return;
 862
 863         advanceToken(lexer, TRUE);
 864
 865         if (lexer->cur_token != TOKEN_IDENT)
 866                 return;
 867
 868         addTag(lexer->token_str, NULL, NULL, K_MACRO, lexer->line, lexer->pos, scope, parent_kind);
 869
 870         skipMacro(lexer);
 871 }
 872
 873 /*
 874  * Rust is very liberal with nesting, so this function is used pretty much for any block
 875  */
 876 static void parseBlock (lexerState *lexer, boolean delim, int kind, vString *scope)
 877 {
 878         int level = 1;
 879         if (delim)
 880         {
 881                 if (lexer->cur_token != '{')
 882                         return;
 883                 advanceToken(lexer, TRUE);
 884         }
 885         while (lexer->cur_token != TOKEN_EOF)
 886         {
 887                 if (lexer->cur_token == TOKEN_IDENT)
 888                 {
 889                         size_t old_scope_len = vStringLength(scope);
 890                         if (strcmp(lexer->token_str->buffer, "fn") == 0)
 891                         {
 892                                 parseFn(lexer, scope, kind);
 893                         }
 894                         else if(strcmp(lexer->token_str->buffer, "mod") == 0)
 895                         {
 896                                 parseMod(lexer, scope, kind);
 897                         }
 898                         else if(strcmp(lexer->token_str->buffer, "static") == 0)
 899                         {
 900                                 parseStatic(lexer, scope, kind);
 901                         }
 902                         else if(strcmp(lexer->token_str->buffer, "trait") == 0)
 903                         {
 904                                 parseTrait(lexer, scope, kind);
 905                         }
 906                         else if(strcmp(lexer->token_str->buffer, "type") == 0)
 907                         {
 908                                 parseType(lexer, scope, kind);
 909                         }
 910                         else if(strcmp(lexer->token_str->buffer, "impl") == 0)
 911                         {
 912                                 parseImpl(lexer, scope, kind);
 913                         }
 914                         else if(strcmp(lexer->token_str->buffer, "struct") == 0)
 915                         {
 916                                 parseStructOrEnum(lexer, scope, kind, TRUE);
 917                         }
 918                         else if(strcmp(lexer->token_str->buffer, "enum") == 0)
 919                         {
 920                                 parseStructOrEnum(lexer, scope, kind, FALSE);
 921                         }
 922                         else if(strcmp(lexer->token_str->buffer, "macro_rules") == 0)
 923                         {
 924                                 parseMacroRules(lexer, scope, kind);
 925                         }
 926                         else
 927                         {
 928                                 advanceToken(lexer, TRUE);
 929                                 if (lexer->cur_token == '!')
 930                                 {
 931                                         skipMacro(lexer);
 932                                 }
 933                         }
 934                         resetScope(scope, old_scope_len);
 935                 }
 936                 else if (lexer->cur_token == '{')
 937                 {
 938                         level++;
 939                         advanceToken(lexer, TRUE);
 940                 }
 941                 else if (lexer->cur_token == '}')
 942                 {
 943                         level--;
 944                         advanceToken(lexer, TRUE);
 945                 }
 946                 else if (lexer->cur_token == '\'')
 947                 {
 948                         /* Skip over the 'static lifetime, as it confuses the static parser above */
 949                         advanceToken(lexer, TRUE);
 950                         if (lexer->cur_token == TOKEN_IDENT && strcmp(lexer->token_str->buffer, "static") == 0)
 951                                 advanceToken(lexer, TRUE);
 952                 }
 953                 else
 954                 {
 955                         advanceToken(lexer, TRUE);
 956                 }
 957                 if (delim && level <= 0)
 958                         break;
 959         }
 960 }
 961
 962 static void findRustTags (void)
 963 {
 964         lexerState lexer;
 965         vString* scope = vStringNew();
 966         initLexer(&lexer);
 967
 968         parseBlock(&lexer, FALSE, K_NONE, scope);
 969         vStringDelete(scope);
 970
 971         deInitLexer(&lexer);
 972 }
 973
 974 extern parserDefinition *RustParser (void)
 975 {
 976         static const char *const extensions[] = { "rs", NULL };
 977         parserDefinition *def = parserNew ("Rust");
 978         def->kinds = rustKinds;
 979         def->kindCount = KIND_COUNT (rustKinds);
 980         def->extensions = extensions;
 981         def->parser = findRustTags;
 982
 983         return def;
 984 }