gcc/rust/expand/rust-macro-expand.cc

   1 // Copyright (C) 2020-2024 Free Software Foundation, Inc.
   2
   3 // This file is part of GCC.
   4
   5 // GCC is free software; you can redistribute it and/or modify it under
   6 // the terms of the GNU General Public License as published by the Free
   7 // Software Foundation; either version 3, or (at your option) any later
   8 // version.
   9
  10 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // for more details.
  14
  15 // You should have received a copy of the GNU General Public License
  16 // along with GCC; see the file COPYING3.  If not see
  17 // <http://www.gnu.org/licenses/>.
  18
  19 #include "rust-macro-expand.h"
  20 #include "rust-macro-substitute-ctx.h"
  21 #include "rust-ast-full.h"
  22 #include "rust-ast-visitor.h"
  23 #include "rust-diagnostics.h"
  24 #include "rust-parse.h"
  25 #include "rust-attribute-visitor.h"
  26 #include "rust-early-name-resolver.h"
  27
  28 namespace Rust {
  29 AST::Fragment
  30 MacroExpander::expand_decl_macro (Location invoc_locus,
  31                                   AST::MacroInvocData &invoc,
  32                                   AST::MacroRulesDefinition &rules_def,
  33                                   bool semicolon)
  34 {
  35   // ensure that both invocation and rules are in a valid state
  36   rust_assert (!invoc.is_marked_for_strip ());
  37   rust_assert (!rules_def.is_marked_for_strip ());
  38   rust_assert (rules_def.get_macro_rules ().size () > 0);
  39
  40   /* probably something here about parsing invoc and rules def token trees to
  41    * token stream. if not, how would parser handle the captures of exprs and
  42    * stuff? on the other hand, token trees may be kind of useful in rules def as
  43    * creating a point where recursion can occur (like having
  44    * "compare_macro_match" and then it calling itself when it finds delimiters)
  45    */
  46
  47   /* find matching rule to invoc token tree, based on macro rule's matcher. if
  48    * none exist, error.
  49    * - specifically, check each matcher in order. if one fails to match, move
  50    * onto next. */
  51   /* TODO: does doing this require parsing expressions and whatever in the
  52    * invoc? if so, might as well save the results if referenced using $ or
  53    * whatever. If not, do another pass saving them. Except this is probably
  54    * useless as different rules could have different starting points for exprs
  55    * or whatever. Decision trees could avoid this, but they have their own
  56    * issues. */
  57   /* TODO: will need to modify the parser so that it can essentially "catch"
  58    * errors - maybe "try_parse_expr" or whatever methods. */
  59   // this technically creates a back-tracking parser - this will be the
  60   // implementation style
  61
  62   /* then, after results are saved, generate the macro output from the
  63    * transcriber token tree. if i understand this correctly, the macro
  64    * invocation gets replaced by the transcriber tokens, except with
  65    * substitutions made (e.g. for $i variables) */
  66
  67   /* TODO: it is probably better to modify AST::Token to store a pointer to a
  68    * Lexer::Token (rather than being converted) - i.e. not so much have
  69    * AST::Token as a Token but rather a TokenContainer (as it is another type of
  70    * TokenTree). This will prevent re-conversion of Tokens between each type
  71    * all the time, while still allowing the heterogenous storage of token trees.
  72    */
  73
  74   AST::DelimTokenTree &invoc_token_tree = invoc.get_delim_tok_tree ();
  75
  76   // find matching arm
  77   AST::MacroRule *matched_rule = nullptr;
  78   std::map<std::string, MatchedFragmentContainer> matched_fragments;
  79   for (auto &rule : rules_def.get_rules ())
  80     {
  81       sub_stack.push ();
  82       bool did_match_rule = try_match_rule (rule, invoc_token_tree);
  83       matched_fragments = sub_stack.pop ();
  84
  85       if (did_match_rule)
  86         {
  87           //  // Debugging
  88           //  for (auto &kv : matched_fragments)
  89           //    rust_debug ("[fragment]: %s (%ld - %s)", kv.first.c_str (),
  90           //            kv.second.get_fragments ().size (),
  91           //            kv.second.get_kind ()
  92           //                == MatchedFragmentContainer::Kind::Repetition
  93           //              ? "repetition"
  94           //              : "metavar");
  95
  96           matched_rule = &rule;
  97           break;
  98         }
  99     }
 100
 101   if (matched_rule == nullptr)
 102     {
 103       RichLocation r (invoc_locus);
 104       r.add_range (rules_def.get_locus ());
 105       rust_error_at (r, "Failed to match any rule within macro");
 106       return AST::Fragment::create_error ();
 107     }
 108
 109   return transcribe_rule (*matched_rule, invoc_token_tree, matched_fragments,
 110                           semicolon, peek_context ());
 111 }
 112
 113 void
 114 MacroExpander::expand_eager_invocations (AST::MacroInvocation &invoc)
 115 {
 116   if (invoc.get_pending_eager_invocations ().empty ())
 117     return;
 118
 119   // We have to basically create a new delimited token tree which contains the
 120   // result of one step of expansion. In the case of builtin macros called with
 121   // other macro invocations, such as `concat!("h", 'a', a!())`, we need to
 122   // expand `a!()` before expanding the concat macro.
 123   // This will, ideally, give us a new token tree containing the various
 124   // existing tokens + the result of the expansion of a!().
 125   // To do this, we "parse" the given token tree to find anything that "looks
 126   // like a macro invocation". Then, we get the corresponding macro invocation
 127   // from the `pending_eager_invocations` vector and expand it.
 128   // Because the `pending_eager_invocations` vector is created in the same order
 129   // that the DelimTokenTree is parsed, we know that the first macro invocation
 130   // within the DelimTokenTree corresponds to the first element in
 131   // `pending_eager_invocations`. The idea is thus to:
 132   // 1. Find a macro invocation in the token tree, noting the index of the start
 133   //    token and of the end token
 134   // 2. Get its associated invocation in `pending_eager_invocations`
 135   // 3. Expand that element
 136   // 4. Get the token tree associated with that AST fragment
 137   // 5. Replace the original tokens corresponding to the invocation with the new
 138   //    tokens from the fragment
 139   // pseudo-code:
 140   //
 141   // i = 0;
 142   // for tok in dtt:
 143   //   if tok is identifier && tok->next() is !:
 144   //     start = index(tok);
 145   //     l_delim = tok->next()->next();
 146   //     tok = skip_until_r_delim();
 147   //     end = index(tok);
 148   //
 149   //     new_tt = expand_eager_invoc(eagers[i++]);
 150   //     old_tt[start..end] = new_tt;
 151
 152   auto dtt = invoc.get_invoc_data ().get_delim_tok_tree ();
 153   auto stream = dtt.to_token_stream ();
 154   std::vector<std::unique_ptr<AST::TokenTree>> new_stream;
 155   size_t current_pending = 0;
 156
 157   // we need to create a clone of the delimited token tree as the lexer
 158   // expects ownership of the tokens
 159   std::vector<std::unique_ptr<Rust::AST::Token>> dtt_clone;
 160   for (auto &tok : stream)
 161     dtt_clone.emplace_back (tok->clone_token ());
 162
 163   MacroInvocLexer lex (std::move (dtt_clone));
 164   Parser<MacroInvocLexer> parser (lex);
 165
 166   // we want to build a substitution map - basically, associating a `start` and
 167   // `end` index for each of the pending macro invocations
 168   std::map<std::pair<size_t, size_t>, std::unique_ptr<AST::MacroInvocation> &>
 169     substitution_map;
 170
 171   for (size_t i = 0; i < stream.size (); i++)
 172     {
 173       // FIXME: Can't these offsets be figure out when we actually parse the
 174       // pending_eager_invocation in the first place?
 175       auto invocation = parser.parse_macro_invocation ({});
 176
 177       // if we've managed to parse a macro invocation, we look at the current
 178       // offset and store them in the substitution map. Otherwise, we skip one
 179       // token and try parsing again
 180       if (invocation)
 181         substitution_map.insert (
 182           {{i, parser.get_token_source ().get_offs ()},
 183            invoc.get_pending_eager_invocations ()[current_pending++]});
 184       else
 185         parser.skip_token (stream[i]->get_id ());
 186     }
 187
 188   size_t current_idx = 0;
 189   for (auto kv : substitution_map)
 190     {
 191       auto &to_expand = kv.second;
 192       expand_invoc (*to_expand, false);
 193
 194       auto fragment = take_expanded_fragment ();
 195       auto &new_tokens = fragment.get_tokens ();
 196
 197       auto start = kv.first.first;
 198       auto end = kv.first.second;
 199
 200       // We're now going to re-add the tokens to the invocation's token tree.
 201       // 1. Basically, what we want to do is insert all tokens up until the
 202       //    beginning of the macro invocation (start).
 203       // 2. Then, we'll insert all of the tokens resulting from the macro
 204       //    expansion: These are in `new_tokens`.
 205       // 3. Finally, we'll do that again from
 206       //    the end of macro and go back to 1.
 207
 208       for (size_t i = current_idx; i < start; i++)
 209         new_stream.emplace_back (stream[i]->clone_token ());
 210
 211       for (auto &tok : new_tokens)
 212         new_stream.emplace_back (tok->clone_token ());
 213
 214       current_idx = end;
 215     }
 216
 217   // Once all of that is done, we copy the last remaining tokens from the
 218   // original stream
 219   for (size_t i = current_idx; i < stream.size (); i++)
 220     new_stream.emplace_back (stream[i]->clone_token ());
 221
 222   auto new_dtt
 223     = AST::DelimTokenTree (dtt.get_delim_type (), std::move (new_stream));
 224
 225   invoc.get_pending_eager_invocations ().clear ();
 226   invoc.get_invoc_data ().set_delim_tok_tree (new_dtt);
 227 }
 228
 229 void
 230 MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon)
 231 {
 232   if (depth_exceeds_recursion_limit ())
 233     {
 234       rust_error_at (invoc.get_locus (), "reached recursion limit");
 235       return;
 236     }
 237
 238   if (invoc.get_kind () == AST::MacroInvocation::InvocKind::Builtin)
 239     expand_eager_invocations (invoc);
 240
 241   AST::MacroInvocData &invoc_data = invoc.get_invoc_data ();
 242
 243   // ??
 244   // switch on type of macro:
 245   //  - '!' syntax macro (inner switch)
 246   //      - procedural macro - "A token-based function-like macro"
 247   //      - 'macro_rules' (by example/pattern-match) macro? or not? "an
 248   // AST-based function-like macro"
 249   //      - else is unreachable
 250   //  - attribute syntax macro (inner switch)
 251   //  - procedural macro attribute syntax - "A token-based attribute
 252   // macro"
 253   //      - legacy macro attribute syntax? - "an AST-based attribute macro"
 254   //      - non-macro attribute: mark known
 255   //      - else is unreachable
 256   //  - derive macro (inner switch)
 257   //      - derive or legacy derive - "token-based" vs "AST-based"
 258   //      - else is unreachable
 259   //  - derive container macro - unreachable
 260
 261   auto fragment = AST::Fragment::create_error ();
 262   invoc_data.set_expander (this);
 263
 264   // lookup the rules
 265   AST::MacroRulesDefinition *rules_def = nullptr;
 266   bool ok = mappings->lookup_macro_invocation (invoc, &rules_def);
 267
 268   // If there's no rule associated with the invocation, we can simply return
 269   // early. The early name resolver will have already emitted an error.
 270   if (!ok)
 271     return;
 272
 273   // We store the last expanded invocation and macro definition for error
 274   // reporting in case the recursion limit is reached
 275   last_invoc = &invoc;
 276   last_def = rules_def;
 277
 278   if (rules_def->is_builtin ())
 279     fragment
 280       = rules_def->get_builtin_transcriber () (invoc.get_locus (), invoc_data);
 281   else
 282     fragment = expand_decl_macro (invoc.get_locus (), invoc_data, *rules_def,
 283                                   has_semicolon);
 284
 285   set_expanded_fragment (std::move (fragment));
 286 }
 287
 288 /* Determines whether any cfg predicate is false and hence item with attributes
 289  * should be stripped. Note that attributes must be expanded before calling. */
 290 bool
 291 MacroExpander::fails_cfg (const AST::AttrVec &attrs) const
 292 {
 293   for (const auto &attr : attrs)
 294     {
 295       if (attr.get_path () == "cfg" && !attr.check_cfg_predicate (session))
 296         return true;
 297     }
 298   return false;
 299 }
 300
 301 /* Determines whether any cfg predicate is false and hence item with attributes
 302  * should be stripped. Will expand attributes as well. */
 303 bool
 304 MacroExpander::fails_cfg_with_expand (AST::AttrVec &attrs) const
 305 {
 306   // TODO: maybe have something that strips cfg attributes that evaluate true?
 307   for (auto &attr : attrs)
 308     {
 309       if (attr.get_path () == "cfg")
 310         {
 311           if (!attr.is_parsed_to_meta_item ())
 312             attr.parse_attr_to_meta_item ();
 313
 314           // DEBUG
 315           if (!attr.is_parsed_to_meta_item ())
 316             rust_debug ("failed to parse attr to meta item, right before "
 317                         "cfg predicate check");
 318           else
 319             rust_debug ("attr has been successfully parsed to meta item, "
 320                         "right before cfg predicate check");
 321
 322           if (!attr.check_cfg_predicate (session))
 323             {
 324               // DEBUG
 325               rust_debug (
 326                 "cfg predicate failed for attribute: \033[0;31m'%s'\033[0m",
 327                 attr.as_string ().c_str ());
 328
 329               return true;
 330             }
 331           else
 332             {
 333               // DEBUG
 334               rust_debug ("cfg predicate succeeded for attribute: "
 335                           "\033[0;31m'%s'\033[0m",
 336                           attr.as_string ().c_str ());
 337             }
 338         }
 339     }
 340   return false;
 341 }
 342
 343 // Expands cfg_attr attributes.
 344 void
 345 MacroExpander::expand_cfg_attrs (AST::AttrVec &attrs)
 346 {
 347   for (std::size_t i = 0; i < attrs.size (); i++)
 348     {
 349       auto &attr = attrs[i];
 350       if (attr.get_path () == "cfg_attr")
 351         {
 352           if (!attr.is_parsed_to_meta_item ())
 353             attr.parse_attr_to_meta_item ();
 354
 355           if (attr.check_cfg_predicate (session))
 356             {
 357               // split off cfg_attr
 358               AST::AttrVec new_attrs = attr.separate_cfg_attrs ();
 359
 360               // remove attr from vector
 361               attrs.erase (attrs.begin () + i);
 362
 363               // add new attrs to vector
 364               attrs.insert (attrs.begin () + i,
 365                             std::make_move_iterator (new_attrs.begin ()),
 366                             std::make_move_iterator (new_attrs.end ()));
 367             }
 368
 369           /* do something - if feature (first token in tree) is in fact enabled,
 370            * make tokens listed afterwards into attributes. i.e.: for
 371            * [cfg_attr(feature = "wow", wow1, wow2)], if "wow" is true, then add
 372            * attributes [wow1] and [wow2] to attribute list. This can also be
 373            * recursive, so check for expanded attributes being recursive and
 374            * possibly recursively call the expand_attrs? */
 375         }
 376       else
 377         {
 378           i++;
 379         }
 380     }
 381   attrs.shrink_to_fit ();
 382 }
 383
 384 void
 385 MacroExpander::expand_crate ()
 386 {
 387   NodeId scope_node_id = crate.get_node_id ();
 388   resolver->get_macro_scope ().push (scope_node_id);
 389
 390   /* fill macro/decorator map from init list? not sure where init list comes
 391    * from? */
 392
 393   // TODO: does cfg apply for inner attributes? research.
 394   // the apparent answer (from playground test) is yes
 395
 396   // expand crate cfg_attr attributes
 397   expand_cfg_attrs (crate.inner_attrs);
 398
 399   if (fails_cfg_with_expand (crate.inner_attrs))
 400     {
 401       // basically, delete whole crate
 402       crate.strip_crate ();
 403       // TODO: maybe create warning here? probably not desired behaviour
 404     }
 405   // expand module attributes?
 406
 407   push_context (ITEM);
 408
 409   // expand attributes recursively and strip items if required
 410   AttrVisitor attr_visitor (*this);
 411   auto &items = crate.items;
 412   for (auto it = items.begin (); it != items.end ();)
 413     {
 414       auto &item = *it;
 415
 416       // mark for stripping if required
 417       item->accept_vis (attr_visitor);
 418
 419       auto fragment = take_expanded_fragment ();
 420       if (fragment.should_expand ())
 421         {
 422           // Remove the current expanded invocation
 423           it = items.erase (it);
 424           for (auto &node : fragment.get_nodes ())
 425             {
 426               it = items.insert (it, node.take_item ());
 427               it++;
 428             }
 429         }
 430       else if (item->is_marked_for_strip ())
 431         it = items.erase (it);
 432       else
 433         it++;
 434     }
 435
 436   pop_context ();
 437
 438   // TODO: should recursive attribute and macro expansion be done in the same
 439   // transversal? Or in separate ones like currently?
 440
 441   // expand module tree recursively
 442
 443   // post-process
 444
 445   // extract exported macros?
 446 }
 447
 448 bool
 449 MacroExpander::depth_exceeds_recursion_limit () const
 450 {
 451   return expansion_depth >= cfg.recursion_limit;
 452 }
 453
 454 bool
 455 MacroExpander::try_match_rule (AST::MacroRule &match_rule,
 456                                AST::DelimTokenTree &invoc_token_tree)
 457 {
 458   MacroInvocLexer lex (invoc_token_tree.to_token_stream ());
 459   Parser<MacroInvocLexer> parser (lex);
 460
 461   AST::MacroMatcher &matcher = match_rule.get_matcher ();
 462
 463   expansion_depth++;
 464   if (!match_matcher (parser, matcher))
 465     {
 466       expansion_depth--;
 467       return false;
 468     }
 469   expansion_depth--;
 470
 471   bool used_all_input_tokens = parser.skip_token (END_OF_FILE);
 472   return used_all_input_tokens;
 473 }
 474
 475 bool
 476 MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
 477                                AST::MacroMatchFragment &fragment)
 478 {
 479   switch (fragment.get_frag_spec ().get_kind ())
 480     {
 481     case AST::MacroFragSpec::EXPR:
 482       parser.parse_expr ();
 483       break;
 484
 485     case AST::MacroFragSpec::BLOCK:
 486       parser.parse_block_expr ();
 487       break;
 488
 489     case AST::MacroFragSpec::IDENT:
 490       parser.parse_identifier_pattern ();
 491       break;
 492
 493     case AST::MacroFragSpec::LITERAL:
 494       parser.parse_literal_expr ();
 495       break;
 496
 497     case AST::MacroFragSpec::ITEM:
 498       parser.parse_item (false);
 499       break;
 500
 501     case AST::MacroFragSpec::TY:
 502       parser.parse_type ();
 503       break;
 504
 505     case AST::MacroFragSpec::PAT:
 506       parser.parse_pattern ();
 507       break;
 508
 509     case AST::MacroFragSpec::PATH:
 510       parser.parse_path_in_expression ();
 511       break;
 512
 513     case AST::MacroFragSpec::VIS:
 514       parser.parse_visibility ();
 515       break;
 516
 517       case AST::MacroFragSpec::STMT: {
 518         auto restrictions = ParseRestrictions ();
 519         restrictions.consume_semi = false;
 520         parser.parse_stmt (restrictions);
 521         break;
 522       }
 523
 524     case AST::MacroFragSpec::LIFETIME:
 525       parser.parse_lifetime_params ();
 526       break;
 527
 528       // is meta attributes?
 529     case AST::MacroFragSpec::META:
 530       parser.parse_attribute_body ();
 531       break;
 532
 533     case AST::MacroFragSpec::TT:
 534       parser.parse_token_tree ();
 535       break;
 536
 537       // i guess we just ignore invalid and just error out
 538     case AST::MacroFragSpec::INVALID:
 539       return false;
 540     }
 541
 542   // it matches if the parser did not produce errors trying to parse that type
 543   // of item
 544   return !parser.has_errors ();
 545 }
 546
 547 bool
 548 MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser,
 549                               AST::MacroMatcher &matcher, bool in_repetition)
 550 {
 551   if (depth_exceeds_recursion_limit ())
 552     {
 553       rust_error_at (matcher.get_match_locus (), "reached recursion limit");
 554       return false;
 555     }
 556
 557   auto delimiter = parser.peek_current_token ();
 558
 559   // this is used so we can check that we delimit the stream correctly.
 560   switch (delimiter->get_id ())
 561     {
 562       case LEFT_PAREN: {
 563         if (!parser.skip_token (LEFT_PAREN))
 564           return false;
 565       }
 566       break;
 567
 568       case LEFT_SQUARE: {
 569         if (!parser.skip_token (LEFT_SQUARE))
 570           return false;
 571       }
 572       break;
 573
 574       case LEFT_CURLY: {
 575         if (!parser.skip_token (LEFT_CURLY))
 576           return false;
 577       }
 578       break;
 579     default:
 580       gcc_unreachable ();
 581     }
 582
 583   const MacroInvocLexer &source = parser.get_token_source ();
 584
 585   for (auto &match : matcher.get_matches ())
 586     {
 587       size_t offs_begin = source.get_offs ();
 588
 589       switch (match->get_macro_match_type ())
 590         {
 591           case AST::MacroMatch::MacroMatchType::Fragment: {
 592             AST::MacroMatchFragment *fragment
 593               = static_cast<AST::MacroMatchFragment *> (match.get ());
 594             if (!match_fragment (parser, *fragment))
 595               return false;
 596
 597             // matched fragment get the offset in the token stream
 598             size_t offs_end = source.get_offs ();
 599             if (in_repetition)
 600               sub_stack.append_fragment (
 601                 MatchedFragment (fragment->get_ident (), offs_begin, offs_end));
 602             else
 603               sub_stack.insert_metavar (
 604                 MatchedFragment (fragment->get_ident (), offs_begin, offs_end));
 605           }
 606           break;
 607
 608           case AST::MacroMatch::MacroMatchType::Tok: {
 609             AST::Token *tok = static_cast<AST::Token *> (match.get ());
 610             if (!match_token (parser, *tok))
 611               return false;
 612           }
 613           break;
 614
 615           case AST::MacroMatch::MacroMatchType::Repetition: {
 616             AST::MacroMatchRepetition *rep
 617               = static_cast<AST::MacroMatchRepetition *> (match.get ());
 618             if (!match_repetition (parser, *rep))
 619               return false;
 620           }
 621           break;
 622
 623           case AST::MacroMatch::MacroMatchType::Matcher: {
 624             AST::MacroMatcher *m
 625               = static_cast<AST::MacroMatcher *> (match.get ());
 626             expansion_depth++;
 627             if (!match_matcher (parser, *m, in_repetition))
 628               {
 629                 expansion_depth--;
 630                 return false;
 631               }
 632             expansion_depth--;
 633           }
 634           break;
 635         }
 636     }
 637
 638   switch (delimiter->get_id ())
 639     {
 640       case LEFT_PAREN: {
 641         if (!parser.skip_token (RIGHT_PAREN))
 642           return false;
 643       }
 644       break;
 645
 646       case LEFT_SQUARE: {
 647         if (!parser.skip_token (RIGHT_SQUARE))
 648           return false;
 649       }
 650       break;
 651
 652       case LEFT_CURLY: {
 653         if (!parser.skip_token (RIGHT_CURLY))
 654           return false;
 655       }
 656       break;
 657     default:
 658       gcc_unreachable ();
 659     }
 660
 661   return true;
 662 }
 663
 664 bool
 665 MacroExpander::match_token (Parser<MacroInvocLexer> &parser, AST::Token &token)
 666 {
 667   // FIXME this needs to actually match the content and the type
 668   return parser.skip_token (token.get_id ());
 669 }
 670
 671 bool
 672 MacroExpander::match_n_matches (Parser<MacroInvocLexer> &parser,
 673                                 AST::MacroMatchRepetition &rep,
 674                                 size_t &match_amount, size_t lo_bound,
 675                                 size_t hi_bound)
 676 {
 677   match_amount = 0;
 678   auto &matches = rep.get_matches ();
 679
 680   const MacroInvocLexer &source = parser.get_token_source ();
 681   while (true)
 682     {
 683       // If the current token is a closing macro delimiter, break away.
 684       // TODO: Is this correct?
 685       auto t_id = parser.peek_current_token ()->get_id ();
 686       if (t_id == RIGHT_PAREN || t_id == RIGHT_SQUARE || t_id == RIGHT_CURLY)
 687         break;
 688
 689       // Skip parsing a separator on the first match, otherwise consume it.
 690       // If it isn't present, this is an error
 691       if (rep.has_sep () && match_amount > 0)
 692         if (!match_token (parser, *rep.get_sep ()))
 693           break;
 694
 695       bool valid_current_match = false;
 696       for (auto &match : matches)
 697         {
 698           size_t offs_begin = source.get_offs ();
 699           switch (match->get_macro_match_type ())
 700             {
 701               case AST::MacroMatch::MacroMatchType::Fragment: {
 702                 AST::MacroMatchFragment *fragment
 703                   = static_cast<AST::MacroMatchFragment *> (match.get ());
 704                 valid_current_match = match_fragment (parser, *fragment);
 705
 706                 // matched fragment get the offset in the token stream
 707                 size_t offs_end = source.get_offs ();
 708
 709                 // The main difference with match_matcher happens here: Instead
 710                 // of inserting a new fragment, we append to one. If that
 711                 // fragment does not exist, then the operation is similar to
 712                 // `insert_fragment` with the difference that we are not
 713                 // creating a metavariable, but a repetition of one, which is
 714                 // really different.
 715                 sub_stack.append_fragment (
 716                   MatchedFragment (fragment->get_ident (), offs_begin,
 717                                    offs_end));
 718               }
 719               break;
 720
 721               case AST::MacroMatch::MacroMatchType::Tok: {
 722                 AST::Token *tok = static_cast<AST::Token *> (match.get ());
 723                 valid_current_match = match_token (parser, *tok);
 724               }
 725               break;
 726
 727               case AST::MacroMatch::MacroMatchType::Repetition: {
 728                 AST::MacroMatchRepetition *rep
 729                   = static_cast<AST::MacroMatchRepetition *> (match.get ());
 730                 valid_current_match = match_repetition (parser, *rep);
 731               }
 732               break;
 733
 734               case AST::MacroMatch::MacroMatchType::Matcher: {
 735                 AST::MacroMatcher *m
 736                   = static_cast<AST::MacroMatcher *> (match.get ());
 737                 valid_current_match = match_matcher (parser, *m, true);
 738               }
 739               break;
 740             }
 741         }
 742       // If we've encountered an error once, stop trying to match more
 743       // repetitions
 744       if (!valid_current_match)
 745         break;
 746
 747       match_amount++;
 748
 749       // Break early if we notice there's too many expressions already
 750       if (hi_bound && match_amount > hi_bound)
 751         break;
 752     }
 753
 754   // Check if the amount of matches we got is valid: Is it more than the lower
 755   // bound and less than the higher bound?
 756   bool did_meet_lo_bound = match_amount >= lo_bound;
 757   bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true;
 758
 759   // If the end-result is valid, then we can clear the parse errors: Since
 760   // repetitions are parsed eagerly, it is okay to fail in some cases
 761   auto res = did_meet_lo_bound && did_meet_hi_bound;
 762   if (res)
 763     parser.clear_errors ();
 764
 765   return res;
 766 }
 767
 768 bool
 769 MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser,
 770                                  AST::MacroMatchRepetition &rep)
 771 {
 772   size_t match_amount = 0;
 773   bool res = false;
 774
 775   std::string lo_str;
 776   std::string hi_str;
 777   switch (rep.get_op ())
 778     {
 779     case AST::MacroMatchRepetition::MacroRepOp::ANY:
 780       lo_str = "0";
 781       hi_str = "+inf";
 782       res = match_n_matches (parser, rep, match_amount);
 783       break;
 784     case AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE:
 785       lo_str = "1";
 786       hi_str = "+inf";
 787       res = match_n_matches (parser, rep, match_amount, 1);
 788       break;
 789     case AST::MacroMatchRepetition::MacroRepOp::ZERO_OR_ONE:
 790       lo_str = "0";
 791       hi_str = "1";
 792       res = match_n_matches (parser, rep, match_amount, 0, 1);
 793       break;
 794     default:
 795       gcc_unreachable ();
 796     }
 797
 798   if (!res)
 799     rust_error_at (rep.get_match_locus (),
 800                    "invalid amount of matches for macro invocation. Expected "
 801                    "between %s and %s, got %lu",
 802                    lo_str.c_str (), hi_str.c_str (),
 803                    (unsigned long) match_amount);
 804
 805   rust_debug_loc (rep.get_match_locus (), "%s matched %lu times",
 806                   res ? "successfully" : "unsuccessfully",
 807                   (unsigned long) match_amount);
 808
 809   // We have to handle zero fragments differently: They will not have been
 810   // "matched" but they are still valid and should be inserted as a special
 811   // case. So we go through the stack map, and for every fragment which doesn't
 812   // exist, insert a zero-matched fragment.
 813   auto &stack_map = sub_stack.peek ();
 814   for (auto &match : rep.get_matches ())
 815     {
 816       if (match->get_macro_match_type ()
 817           == AST::MacroMatch::MacroMatchType::Fragment)
 818         {
 819           auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ());
 820           auto it = stack_map.find (fragment->get_ident ());
 821
 822           if (it == stack_map.end ())
 823             sub_stack.insert_matches (fragment->get_ident (),
 824                                       MatchedFragmentContainer::zero ());
 825         }
 826     }
 827
 828   return res;
 829 }
 830
 831 /**
 832  * Helper function to refactor calling a parsing function 0 or more times
 833  */
 834 static AST::Fragment
 835 parse_many (Parser<MacroInvocLexer> &parser, TokenId &delimiter,
 836             std::function<AST::SingleASTNode ()> parse_fn)
 837 {
 838   auto &lexer = parser.get_token_source ();
 839   auto start = lexer.get_offs ();
 840
 841   std::vector<AST::SingleASTNode> nodes;
 842   while (true)
 843     {
 844       if (parser.peek_current_token ()->get_id () == delimiter)
 845         break;
 846
 847       auto node = parse_fn ();
 848       if (node.is_error ())
 849         {
 850           for (auto err : parser.get_errors ())
 851             err.emit ();
 852
 853           return AST::Fragment::create_error ();
 854         }
 855
 856       nodes.emplace_back (std::move (node));
 857     }
 858   auto end = lexer.get_offs ();
 859
 860   return AST::Fragment (std::move (nodes), lexer.get_token_slice (start, end));
 861 }
 862
 863 /**
 864  * Transcribe 0 or more items from a macro invocation
 865  *
 866  * @param parser Parser to extract items from
 867  * @param delimiter Id of the token on which parsing should stop
 868  */
 869 static AST::Fragment
 870 transcribe_many_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 871 {
 872   return parse_many (parser, delimiter, [&parser] () {
 873     auto item = parser.parse_item (true);
 874     return AST::SingleASTNode (std::move (item));
 875   });
 876 }
 877
 878 /**
 879  * Transcribe 0 or more external items from a macro invocation
 880  *
 881  * @param parser Parser to extract items from
 882  * @param delimiter Id of the token on which parsing should stop
 883  */
 884 static AST::Fragment
 885 transcribe_many_ext (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 886 {
 887   return parse_many (parser, delimiter, [&parser] () {
 888     auto item = parser.parse_external_item ();
 889     return AST::SingleASTNode (std::move (item));
 890   });
 891 }
 892
 893 /**
 894  * Transcribe 0 or more trait items from a macro invocation
 895  *
 896  * @param parser Parser to extract items from
 897  * @param delimiter Id of the token on which parsing should stop
 898  */
 899 static AST::Fragment
 900 transcribe_many_trait_items (Parser<MacroInvocLexer> &parser,
 901                              TokenId &delimiter)
 902 {
 903   return parse_many (parser, delimiter, [&parser] () {
 904     auto item = parser.parse_trait_item ();
 905     return AST::SingleASTNode (std::move (item));
 906   });
 907 }
 908
 909 /**
 910  * Transcribe 0 or more impl items from a macro invocation
 911  *
 912  * @param parser Parser to extract items from
 913  * @param delimiter Id of the token on which parsing should stop
 914  */
 915 static AST::Fragment
 916 transcribe_many_impl_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 917 {
 918   return parse_many (parser, delimiter, [&parser] () {
 919     auto item = parser.parse_inherent_impl_item ();
 920     return AST::SingleASTNode (std::move (item));
 921   });
 922 }
 923
 924 /**
 925  * Transcribe 0 or more trait impl items from a macro invocation
 926  *
 927  * @param parser Parser to extract items from
 928  * @param delimiter Id of the token on which parsing should stop
 929  */
 930 static AST::Fragment
 931 transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser,
 932                                   TokenId &delimiter)
 933 {
 934   return parse_many (parser, delimiter, [&parser] () {
 935     auto item = parser.parse_trait_impl_item ();
 936     return AST::SingleASTNode (std::move (item));
 937   });
 938 }
 939
 940 /**
 941  * Transcribe 0 or more statements from a macro invocation
 942  *
 943  * @param parser Parser to extract statements from
 944  * @param delimiter Id of the token on which parsing should stop
 945  */
 946 static AST::Fragment
 947 transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
 948 {
 949   auto restrictions = ParseRestrictions ();
 950   restrictions.consume_semi = false;
 951
 952   // FIXME: This is invalid! It needs to also handle cases where the macro
 953   // transcriber is an expression, but since the macro call is followed by
 954   // a semicolon, it's a valid ExprStmt
 955   return parse_many (parser, delimiter, [&parser, restrictions] () {
 956     auto stmt = parser.parse_stmt (restrictions);
 957     return AST::SingleASTNode (std::move (stmt));
 958   });
 959 }
 960
 961 /**
 962  * Transcribe one expression from a macro invocation
 963  *
 964  * @param parser Parser to extract statements from
 965  */
 966 static AST::Fragment
 967 transcribe_expression (Parser<MacroInvocLexer> &parser)
 968 {
 969   auto &lexer = parser.get_token_source ();
 970   auto start = lexer.get_offs ();
 971
 972   auto expr = parser.parse_expr ();
 973   if (expr == nullptr)
 974     return AST::Fragment::create_error ();
 975
 976   auto end = lexer.get_offs ();
 977
 978   return AST::Fragment ({std::move (expr)}, lexer.get_token_slice (start, end));
 979 }
 980
 981 /**
 982  * Transcribe one type from a macro invocation
 983  *
 984  * @param parser Parser to extract statements from
 985  */
 986 static AST::Fragment
 987 transcribe_type (Parser<MacroInvocLexer> &parser)
 988 {
 989   auto &lexer = parser.get_token_source ();
 990   auto start = lexer.get_offs ();
 991
 992   auto type = parser.parse_type (true);
 993   for (auto err : parser.get_errors ())
 994     err.emit ();
 995
 996   auto end = lexer.get_offs ();
 997
 998   return AST::Fragment ({std::move (type)}, lexer.get_token_slice (start, end));
 999 }
1000
1001 static AST::Fragment
1002 transcribe_on_delimiter (Parser<MacroInvocLexer> &parser, bool semicolon,
1003                          AST::DelimType delimiter, TokenId last_token_id)
1004 {
1005   if (semicolon || delimiter == AST::DelimType::CURLY)
1006     return transcribe_many_stmts (parser, last_token_id);
1007   else
1008     return transcribe_expression (parser);
1009 } // namespace Rust
1010
1011 static AST::Fragment
1012 transcribe_context (MacroExpander::ContextType ctx,
1013                     Parser<MacroInvocLexer> &parser, bool semicolon,
1014                     AST::DelimType delimiter, TokenId last_token_id)
1015 {
1016   // The flow-chart in order to choose a parsing function is as follows:
1017   //
1018   // [switch special context]
1019   //     -- Item --> parser.parse_item();
1020   //     -- Trait --> parser.parse_trait_item();
1021   //     -- Impl --> parser.parse_impl_item();
1022   //     -- Extern --> parser.parse_extern_item();
1023   //     -- None --> [has semicolon?]
1024   //                 -- Yes --> parser.parse_stmt();
1025   //                 -- No --> [switch invocation.delimiter()]
1026   //                             -- { } --> parser.parse_stmt();
1027   //                             -- _ --> parser.parse_expr(); // once!
1028
1029   // If there is a semicolon OR we are expanding a MacroInvocationSemi, then
1030   // we can parse multiple items. Otherwise, parse *one* expression
1031
1032   switch (ctx)
1033     {
1034     case MacroExpander::ContextType::ITEM:
1035       return transcribe_many_items (parser, last_token_id);
1036       break;
1037     case MacroExpander::ContextType::TRAIT:
1038       return transcribe_many_trait_items (parser, last_token_id);
1039       break;
1040     case MacroExpander::ContextType::IMPL:
1041       return transcribe_many_impl_items (parser, last_token_id);
1042       break;
1043     case MacroExpander::ContextType::TRAIT_IMPL:
1044       return transcribe_many_trait_impl_items (parser, last_token_id);
1045       break;
1046     case MacroExpander::ContextType::EXTERN:
1047       return transcribe_many_ext (parser, last_token_id);
1048       break;
1049     case MacroExpander::ContextType::TYPE:
1050       return transcribe_type (parser);
1051       break;
1052     default:
1053       return transcribe_on_delimiter (parser, semicolon, delimiter,
1054                                       last_token_id);
1055     }
1056 }
1057
1058 static std::string
1059 tokens_to_str (std::vector<std::unique_ptr<AST::Token>> &tokens)
1060 {
1061   std::string str;
1062   if (!tokens.empty ())
1063     {
1064       str += tokens[0]->as_string ();
1065       for (size_t i = 1; i < tokens.size (); i++)
1066         str += " " + tokens[i]->as_string ();
1067     }
1068
1069   return str;
1070 }
1071
1072 AST::Fragment
1073 MacroExpander::transcribe_rule (
1074   AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
1075   std::map<std::string, MatchedFragmentContainer> &matched_fragments,
1076   bool semicolon, ContextType ctx)
1077 {
1078   // we can manipulate the token tree to substitute the dollar identifiers so
1079   // that when we call parse its already substituted for us
1080   AST::MacroTranscriber &transcriber = match_rule.get_transcriber ();
1081   AST::DelimTokenTree &transcribe_tree = transcriber.get_token_tree ();
1082
1083   auto invoc_stream = invoc_token_tree.to_token_stream ();
1084   auto macro_rule_tokens = transcribe_tree.to_token_stream ();
1085
1086   auto substitute_context
1087     = SubstituteCtx (invoc_stream, macro_rule_tokens, matched_fragments);
1088   std::vector<std::unique_ptr<AST::Token>> substituted_tokens
1089     = substitute_context.substitute_tokens ();
1090
1091   rust_debug ("substituted tokens: %s",
1092               tokens_to_str (substituted_tokens).c_str ());
1093
1094   // parse it to an Fragment
1095   MacroInvocLexer lex (std::move (substituted_tokens));
1096   Parser<MacroInvocLexer> parser (lex);
1097
1098   auto last_token_id = TokenId::RIGHT_CURLY;
1099
1100   // this is used so we can check that we delimit the stream correctly.
1101   switch (transcribe_tree.get_delim_type ())
1102     {
1103     case AST::DelimType::PARENS:
1104       last_token_id = TokenId::RIGHT_PAREN;
1105       rust_assert (parser.skip_token (LEFT_PAREN));
1106       break;
1107
1108     case AST::DelimType::CURLY:
1109       rust_assert (parser.skip_token (LEFT_CURLY));
1110       break;
1111
1112     case AST::DelimType::SQUARE:
1113       last_token_id = TokenId::RIGHT_SQUARE;
1114       rust_assert (parser.skip_token (LEFT_SQUARE));
1115       break;
1116     }
1117
1118   // see https://github.com/Rust-GCC/gccrs/issues/22
1119   // TL;DR:
1120   //   - Treat all macro invocations with parentheses, (), or square brackets,
1121   //   [], as expressions.
1122   //   - If the macro invocation has curly brackets, {}, it may be parsed as a
1123   //   statement depending on the context.
1124   //   - If the macro invocation has a semicolon at the end, it must be parsed
1125   //   as a statement (either via ExpressionStatement or
1126   //   MacroInvocationWithSemi)
1127
1128   auto fragment
1129     = transcribe_context (ctx, parser, semicolon,
1130                           invoc_token_tree.get_delim_type (), last_token_id);
1131
1132   // emit any errors
1133   if (parser.has_errors ())
1134     {
1135       for (auto &err : parser.get_errors ())
1136         rust_error_at (err.locus, "%s", err.message.c_str ());
1137       return AST::Fragment::create_error ();
1138     }
1139
1140   // are all the tokens used?
1141   bool did_delimit = parser.skip_token (last_token_id);
1142
1143   bool reached_end_of_stream = did_delimit && parser.skip_token (END_OF_FILE);
1144   if (!reached_end_of_stream)
1145     {
1146       const_TokenPtr current_token = parser.peek_current_token ();
1147       rust_error_at (current_token->get_locus (),
1148                      "tokens here and after are unparsed");
1149     }
1150
1151   return fragment;
1152 }
1153 } // namespace Rust