src/perl6/STD.pm6

   1 # STD.pm
   2 #
   3 # Copyright 2007-2010, Larry Wall
   4 #
   5 # You may copy this software under the terms of the Artistic License,
   6 #     version 2.0 or later.
   7
   8 grammar STD:ver<6.0.0.alpha>:auth<http://perl.org>;
   9
  10 use DEBUG;
  11 use NAME;
  12 use Stash;
  13 use Cursor;
  14
  15 our $ALL;
  16
  17 =begin comment
  18
  19     Contextuals used in STD
  20     =======================
  21     # per parse
  22     my $*ACTIONS;         # class or object which defines reduce actions
  23     my $*SETTINGNAME;     # name of core setting
  24     my $*TMP_PREFIX;      # where to put tmp files
  25     my $*ORIG;            # the original program string
  26     my @*ORIG;            # same thing as individual chars
  27     my @*MEMOS;           # per-position info such as ws and line number
  28     my $*HIGHWATER;      # where we were last looking for things
  29     my $*HIGHMESS;       # current parse failure message
  30     my $*HIGHEXPECT;     # things we were looking for at the bleeding edge
  31     my $*IN_PANIC;       # don't panic recursively
  32
  33     # symbol table management
  34     our $ALL;            # all the stashes, keyed by id
  35     my $*CORE;            # the CORE scope
  36     my $*SETTING;         # the SETTING scope
  37     my $*GLOBAL;          # the GLOBAL scope
  38     my $*PROCESS;         # the PROCESS scope
  39     my $*UNIT;            # the UNIT scope
  40     my $*CURLEX;          # current lexical scope info
  41     my $*CURPKG;          # current package scope
  42
  43     my %*MYSTERY;     # names we assume may be post-declared functions
  44
  45     # tree attributes, marked as propagating up (u) down (d) or up-and-down (u/d)
  46     my %*LANG;            # (d) braided languages: MAIN, Q, Regex, etc
  47
  48     my $*IN_DECL;     # (d) a declarator is looking for a name to declare
  49     my $*SCOPE = "";      # (d) which scope declarator we're under
  50     my $*MULTINESS;       # (d) which multi declarator we're under
  51     my $*PKGDECL ::= "";         # (d) current package declarator
  52     my $*NEWPKG;      # (u/d) new package being declared
  53     my $*NEWLEX;      # (u/d) new lex info being declared
  54     my $*DECLARAND;   # (u/d) new object associated with declaration
  55
  56     my $*GOAL ::= "(eof)";  # (d) which special terminator we're most wanting
  57     my $*IN_REDUCE;   # (d) attempting to parse an [op] construct
  58     my $*IN_META;     # (d) parsing a metaoperator like [..]
  59     my $*QUASIMODO;   # (d) don't carp about quasi variables
  60     my $*LEFTSIGIL;   # (u) sigil of LHS for item vs list assignment
  61     my $*QSIGIL;      # (d) sigil of current interpolation
  62
  63     my $*INVOCANT_OK; # (d) parsing a list that allows an invocant
  64     my $*INVOCANT_IS; # (u) invocant of args match
  65
  66     my $*BORG;            # (u/d) who to blame if we're missing a block
  67
  68 =end comment
  69
  70 =begin notes
  71
  72     Some rules are named by syntactic category plus an additional symbol
  73     specified in adverbial form, either in bare :name form or in :sym<name>
  74     form.  (It does not matter which form you use for identifier symbols,
  75     except that to specify a symbol "sym" you must use the :sym<sym> form
  76     of adverb.)  If you use the <sym> rule within the rule, it will parse the
  77     symbol at that point.  At the final reduction point of a rule, if $sym
  78     has been set, that is used as the final symbol name for the rule.  This
  79     need not match the symbol specified as part the rule name; that is just
  80     for disambiguating the name.  However, if no $sym is set, the original
  81     symbol will be used by default.
  82
  83     Note that some of these rules are written strangely because we're
  84     still bootstrapping via a preprocessor, gimme5.  For instance,
  85     blocks that contain nested braces are delimited by double braces
  86     so that the preprocessor does not need to parse Perl 6 code.
  87
  88     This grammar relies on transitive longest-token semantics, though
  89     initially we made a feeble attempt to order rules so a procedural
  90     interpretation of alternation could usually produce a correct parse.
  91     (This is becoming less true over time.)
  92
  93 =end notes
  94
  95 method p6class () { ::STD::P6 }
  96
  97 method TOP ($STOP = '') {
  98     my $lang = self.cursor_fresh( self.p6class );
  99
 100     if $STOP {
 101         my $*GOAL ::= $STOP;
 102         $lang.unitstop($STOP).comp_unit;
 103     }
 104     else {
 105         $lang.comp_unit;
 106     }
 107 }
 108
 109 ##############
 110 # Precedence #
 111 ##############
 112
 113 # The internal precedence levels are *not* part of the public interface.
 114 # The current values are mere implementation; they may change at any time.
 115 # Users should specify precedence only in relation to existing levels.
 116
 117 constant %term            = (:dba('term')            , :prec<z=>);
 118 constant %methodcall      = (:dba('methodcall')      , :prec<y=>, :assoc<unary>, :uassoc<left>, :fiddly, :!pure);
 119 constant %autoincrement   = (:dba('autoincrement')   , :prec<x=>, :assoc<unary>, :uassoc<non>, :!pure);
 120 constant %exponentiation  = (:dba('exponentiation')  , :prec<w=>, :assoc<right>, :pure);
 121 constant %symbolic_unary  = (:dba('symbolic unary')  , :prec<v=>, :assoc<unary>, :uassoc<left>, :pure);
 122 constant %multiplicative  = (:dba('multiplicative')  , :prec<u=>, :assoc<left>, :pure);
 123 constant %additive        = (:dba('additive')        , :prec<t=>, :assoc<left>, :pure);
 124 constant %replication     = (:dba('replication')     , :prec<s=>, :assoc<left>, :pure);
 125 constant %concatenation   = (:dba('concatenation')   , :prec<r=>, :assoc<list>, :pure);
 126 constant %junctive_and    = (:dba('junctive and')    , :prec<q=>, :assoc<list>, :pure);
 127 constant %junctive_or     = (:dba('junctive or')     , :prec<p=>, :assoc<list>, :pure);
 128 constant %named_unary     = (:dba('named unary')     , :prec<o=>, :assoc<unary>, :uassoc<left>, :pure);
 129 constant %structural      = (:dba('structural infix'), :prec<n=>, :assoc<non>, :diffy);
 130 constant %chaining        = (:dba('chaining')        , :prec<m=>, :assoc<chain>, :diffy, :iffy, :pure);
 131 constant %tight_and       = (:dba('tight and')       , :prec<l=>, :assoc<list>);
 132 constant %tight_or        = (:dba('tight or')        , :prec<k=>, :assoc<list>);
 133 constant %conditional     = (:dba('conditional')     , :prec<j=>, :assoc<right>, :fiddly);
 134 constant %item_assignment = (:dba('item assignment') , :prec<i=>, :assoc<right>, :!pure);
 135 constant %list_assignment = (:dba('list assignment') , :prec<i=>, :assoc<right>, :sub<e=>, :fiddly, :!pure);
 136 constant %loose_unary     = (:dba('loose unary')     , :prec<h=>, :assoc<unary>, :uassoc<left>, :pure);
 137 constant %comma           = (:dba('comma')           , :prec<g=>, :assoc<list>, :nextterm<nulltermish>, :fiddly, :pure);
 138 constant %list_infix      = (:dba('list infix')      , :prec<f=>, :assoc<list>, :pure);
 139 constant %list_prefix     = (:dba('list prefix')     , :prec<e=>, :assoc<unary>, :uassoc<left>);
 140 constant %loose_and       = (:dba('loose and')       , :prec<d=>, :assoc<list>);
 141 constant %loose_or        = (:dba('loose or')        , :prec<c=>, :assoc<list>);
 142 constant %sequencer       = (:dba('sequencer')       , :prec<b=>, :assoc<list>, :nextterm<statement>, :fiddly);
 143 constant %LOOSEST         = (:dba('LOOSEST')         , :prec<a=!>);
 144 constant %terminator      = (:dba('terminator')      , :prec<a=>, :assoc<list>);
 145
 146 # "epsilon" tighter than terminator
 147 #constant $LOOSEST = %LOOSEST<prec>;
 148 constant $LOOSEST = "a=!"; # XXX preceding line is busted
 149 constant $item_assignment_prec = 'i=';
 150 constant $methodcall_prec = 'y=';
 151
 152 ##############
 153 # Categories #
 154 ##############
 155
 156 # Categories are designed to be easily extensible in derived grammars
 157 # by merely adding more rules in the same category.  The rules within
 158 # a given category start with the category name followed by a differentiating
 159 # adverbial qualifier to serve (along with the category) as the longer name.
 160
 161 # The endsym context, if specified, says what to implicitly check for in each
 162 # rule right after the initial <sym>.  Normally this is used to make sure
 163 # there's appropriate whitespace.  # Note that endsym isn't called if <sym>
 164 # isn't called.
 165
 166 my $*endsym = "null";
 167 my $*endargs = -1;
 168
 169 proto token category {*}
 170
 171 token category:category { <sym> }
 172
 173 token category:sigil { <sym> }
 174 proto token sigil {*}
 175
 176 token category:twigil { <sym> }
 177 proto token twigil (:$*endsym = 'begid') {*}
 178
 179 token category:special_variable { <sym> }
 180 proto token special_variable {*}
 181
 182 token category:comment { <sym> }
 183 proto token comment {*}
 184
 185 token category:version { <sym> }
 186 proto token version {*}
 187
 188 token category:module_name { <sym> }
 189 proto token module_name {*}
 190
 191 token category:value { <sym> }
 192 proto token value {*}
 193
 194 token category:term { <sym> }
 195 proto token term {*}
 196
 197 token category:strtonum { <sym> }
 198 proto token strtonum {*}
 199
 200 token category:quote { <sym> }
 201 proto token quote () {*}
 202
 203 token category:prefix { <sym> }
 204 proto token prefix is unary is defequiv(%symbolic_unary) {*}
 205
 206 token category:infix { <sym> }
 207 proto token infix is binary is defequiv(%additive) {*}
 208
 209 token category:postfix { <sym> }
 210 proto token postfix is unary is defequiv(%autoincrement) {*}
 211
 212 token category:dotty { <sym> }
 213 proto token dotty (:$*endsym = 'unspacey') {*}
 214
 215 token category:circumfix { <sym> }
 216 proto token circumfix {*}
 217
 218 token category:postcircumfix { <sym> }
 219 proto token postcircumfix is unary {*}  # unary as far as EXPR knows...
 220
 221 token category:quote_mod { <sym> }
 222 proto token quote_mod {*}
 223
 224 token category:trait_mod { <sym> }
 225 proto token trait_mod (:$*endsym = 'spacey') {*}
 226
 227 token category:type_declarator { <sym> }
 228 proto token type_declarator (:$*endsym = 'spacey') {*}
 229
 230 token category:scope_declarator { <sym> }
 231 proto token scope_declarator (:$*endsym = 'nofun') {*}
 232
 233 token category:package_declarator { <sym> }
 234 proto token package_declarator (:$*endsym = 'spacey') {*}
 235
 236 token category:multi_declarator { <sym> }
 237 proto token multi_declarator (:$*endsym = 'spacey') {*}
 238
 239 token category:routine_declarator { <sym> }
 240 proto token routine_declarator (:$*endsym = 'nofun') {*}
 241
 242 token category:regex_declarator { <sym> }
 243 proto token regex_declarator (:$*endsym = 'spacey') {*}
 244
 245 token category:statement_prefix { <sym> }
 246 proto rule  statement_prefix () {*}
 247
 248 token category:statement_control { <sym> }
 249 proto rule  statement_control (:$*endsym = 'spacey') {*}
 250
 251 token category:statement_mod_cond { <sym> }
 252 proto rule  statement_mod_cond (:$*endsym = 'nofun') {*}
 253
 254 token category:statement_mod_loop { <sym> }
 255 proto rule  statement_mod_loop (:$*endsym = 'nofun') {*}
 256
 257 token category:infix_prefix_meta_operator { <sym> }
 258 proto token infix_prefix_meta_operator is binary {*}
 259
 260 token category:infix_postfix_meta_operator { <sym> }
 261 proto token infix_postfix_meta_operator ($op) is binary {*}
 262
 263 token category:infix_circumfix_meta_operator { <sym> }
 264 proto token infix_circumfix_meta_operator is binary {*}
 265
 266 token category:postfix_prefix_meta_operator { <sym> }
 267 proto token postfix_prefix_meta_operator is unary {*}
 268
 269 token category:prefix_postfix_meta_operator { <sym> }
 270 proto token prefix_postfix_meta_operator is unary {*}
 271
 272 token category:prefix_circumfix_meta_operator { <sym> }
 273 proto token prefix_circumfix_meta_operator is unary {*}
 274
 275 token category:terminator { <sym> }
 276 proto token terminator {*}
 277
 278 token unspacey { <.unsp>? }
 279 token begid { <?before \w> }
 280 token endid { <?before <-[ \- \' \w ]> > }
 281 token spacey { <?before <[ \s \# ]> > }
 282 token nofun { <!before '(' | '.(' | '\\' | '\'' | '-' | "'" | \w > }
 283
 284 # Note, don't reduce on a bare sigil unless you don't want a twigil or
 285 # you otherwise don't care what the longest token is.
 286
 287 token sigil:sym<$>  { <sym> }
 288 token sigil:sym<@>  { <sym> }
 289 token sigil:sym<%>  { <sym> }
 290 token sigil:sym<&>  { <sym> }
 291
 292 token twigil:sym<.> { <sym> }
 293 token twigil:sym<!> { <sym> }
 294 token twigil:sym<^> { <sym> }
 295 token twigil:sym<:> { <sym> }
 296 token twigil:sym<*> { <sym> }
 297 token twigil:sym<?> { <sym> }
 298 token twigil:sym<=> { <sym> }
 299 token twigil:sym<~> { <sym> }
 300
 301 # overridden in subgrammars
 302 token stopper { <!> }
 303
 304 # hopefully we can include these tokens in any outer LTM matcher
 305 regex stdstopper {
 306     :temp $*STUB = return self if @*MEMOS[self.pos]<endstmt> :exists;
 307     :dba('standard stopper')
 308     [
 309     | <?terminator>
 310     | <?unitstopper>
 311     | <?before <stopper> >
 312     | $                                 # unlikely, check last (normal LTM behavior)
 313     ]
 314     { @*MEMOS[$¢.pos]<endstmt> ||= 1; }
 315 }
 316
 317 token longname {
 318     <name> {} [ <!before ':{'> <colonpair> ]*
 319 }
 320
 321 token name {
 322     [
 323     | <identifier> <morename>*
 324     | <morename>+
 325     ]
 326 }
 327
 328 token morename {
 329     :my $*QSIGIL ::= '';
 330     '::'
 331     [
 332     ||  <?before '(' | <alpha> >
 333         [
 334         | <identifier>
 335         | :dba('indirect name') '(' ~ ')' <EXPR>
 336         ]
 337     || <?before '::'> <.panic: "Name component may not be null">
 338     ]?
 339 }
 340
 341 ##############################
 342 # Quote primitives           #
 343 ##############################
 344
 345 # assumes whitespace is eaten already
 346
 347 method peek_delimiters {
 348     my $pos = self.pos;
 349     my $startpos = $pos;
 350     my $char = substr($*ORIG,$pos++,1);
 351     if $char ~~ /^\s$/ {
 352         self.panic("Whitespace character is not allowed as delimiter"); # "can't happen"
 353     }
 354     elsif $char ~~ /^\w$/ {
 355         self.panic("Alphanumeric character is not allowed as delimiter");
 356     }
 357     elsif %STD::close2open{$char} {
 358         self.panic("Use of a closing delimiter for an opener is reserved");
 359     }
 360     elsif $char eq ':' {
 361         self.panic("Colons may not be used to delimit quoting constructs");
 362     }
 363
 364     my $rightbrack = %STD::open2close{$char};
 365     if not defined $rightbrack {
 366         return $char, $char;
 367     }
 368     while substr($*ORIG,$pos,1) eq $char {
 369         $pos++;
 370     }
 371     my $len = $pos - $startpos;
 372     my $start = $char x $len;
 373     my $stop = $rightbrack x $len;
 374     return $start, $stop;
 375 }
 376
 377 role startstop[$start,$stop] {
 378     token starter { $start }
 379     token stopper { $stop }
 380 } # end role
 381
 382 role stop[$stop] {
 383     token starter { <!> }
 384     token stopper { $stop }
 385 } # end role
 386
 387 role unitstop[$stop] {
 388     token unitstopper { $stop }
 389 } # end role
 390
 391 token unitstopper { $ }
 392
 393 method balanced ($start,$stop) { self.mixin( ::startstop[$start,$stop] ); }
 394 method unbalanced ($stop) { self.mixin( ::stop[$stop] ); }
 395 method unitstop ($stop) { self.mixin( ::unitstop[$stop] ); }
 396
 397 method truly ($bool,$opt) {
 398     return self if $bool;
 399     self.sorry("Can't negate $opt adverb");
 400     self;
 401 }
 402
 403 token charname {
 404     [
 405     | <radint>
 406     | <alpha> .*? <?before \s*[ ',' | '#' | ']']>
 407     ] || <.sorry: "Unrecognized character name"> .*?<?terminator>
 408 }
 409
 410 token charnames { \s* [<charname><.ws>] ** [','\s*] }
 411
 412 token charspec {
 413     [
 414     | :dba('character name') '[' ~ ']' <charnames>
 415     | \d+
 416     | <[ ?..Z \\.._ ]>
 417     | <?> <.sorry: "Unrecognized \\c character"> .
 418     ]
 419 }
 420
 421 proto token backslash {*}
 422 proto token escape {*}
 423 token starter { <!> }
 424 token escape:none { <!> }
 425
 426 # and this is what makes nibbler polymorphic...
 427 method nibble ($lang) {
 428     self.cursor_fresh($lang).nibbler;
 429 }
 430
 431 # note: polymorphic over many quote languages, we hope
 432 token nibbler {
 433     :my $text = '';
 434     :my $from = self.pos;
 435     :my $to = $from;
 436     :my @nibbles = ();
 437     :my $multiline = 0;
 438     { $.from = self.pos; }
 439     [ <!before <stopper> >
 440         [
 441         || <starter> <nibbler> <stopper>
 442                         {{
 443                             push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to;
 444
 445                             my $n = $<nibbler>[*-1]<nibbles>;
 446                             my @n = @$n;
 447
 448                             push @nibbles, $<starter>;
 449                             push @nibbles, @n;
 450                             push @nibbles, $<stopper>;
 451
 452                             $text = '';
 453                             $to = $from = $¢.pos;
 454                         }}
 455         || <escape>     {{
 456                             push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to;
 457                             push @nibbles, $<escape>[*-1];
 458                             $text = '';
 459                             $to = $from = $¢.pos;
 460                         }}
 461         || .
 462                         {{
 463                             my $ch = substr($*ORIG, $¢.pos-1, 1);
 464                             $text ~= $ch;
 465                             $to = $¢.pos;
 466                             if $ch ~~ "\n" {
 467                                 $multiline++;
 468                             }
 469                         }}
 470         ]
 471     ]*
 472     {{
 473         push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to or !@nibbles;
 474         $<nibbles> = \@nibbles;
 475         $.pos = $¢.pos;
 476         $<nibbler> :delete;
 477         $<escape> :delete;
 478         $<starter> :delete;
 479         $<stopper> :delete;
 480         $*LAST_NIBBLE = $¢;
 481         $*LAST_NIBBLE_MULTILINE = $¢ if $multiline;
 482     }}
 483 }
 484
 485 token babble ($l) {
 486     :my $lang = $l;
 487     :my $start;
 488     :my $stop;
 489
 490     <.ws>
 491     [ <quotepair> <.ws>
 492         {
 493             my $kv = $<quotepair>[*-1];
 494             $lang = ($lang.tweak($kv.<k>, $kv.<v>)
 495                 or $lang.sorry("Unrecognized adverb :" ~ $kv.<k> ~ '(' ~ $kv.<v> ~ ')'));
 496         }
 497     ]*
 498
 499     {
 500         ($start,$stop) = $¢.peek_delimiters();
 501         $lang = $start ne $stop ?? $lang.balanced($start,$stop)
 502                                 !! $lang.unbalanced($stop);
 503         $<B> = [$lang,$start,$stop];
 504     }
 505 }
 506
 507 our @herestub_queue;
 508
 509 class Herestub {
 510     has Str $.delim;
 511     has $.orignode;
 512     has $.lang;
 513 } # end class
 514
 515 role herestop {
 516     token stopper { ^^ {} $<ws>=(\h*?) $*DELIM \h* <.unv>?? $$ \v? }
 517 } # end role
 518
 519 # XXX be sure to temporize @herestub_queue on reentry to new line of heredocs
 520
 521 method heredoc () {
 522     my $*CTX ::= self.callm if $*DEBUG +& DEBUG::trace_call;
 523     my $here = self;
 524     while my $herestub = shift @herestub_queue {
 525         my $*DELIM = $herestub.delim;
 526         my $lang = $herestub.lang.mixin( ::herestop );
 527         my $doc;
 528         if ($doc) = $here.nibble($lang) {
 529             $here = $doc.trim_heredoc();
 530             $herestub.orignode<doc> = $doc;
 531         }
 532         else {
 533             self.panic("Ending delimiter $*DELIM not found");
 534         }
 535     }
 536     return self.cursor($here.pos);  # return to initial type
 537 }
 538
 539 token quibble ($l) {
 540     :my ($lang, $start, $stop);
 541     <babble($l)>
 542     { my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
 543
 544     $start <nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
 545
 546     {{
 547         if $lang<_herelang> {
 548             push @herestub_queue,
 549                 ::Herestub.new(
 550                     delim => $<nibble><nibbles>[0]<TEXT>,
 551                     orignode => $¢,
 552                     lang => $lang<_herelang>,
 553                 );
 554         }
 555     }}
 556 }
 557
 558 token quotepair {
 559     :my $key;
 560     :my $value;
 561
 562     ':'
 563     :dba('colon pair (restricted)')
 564     [
 565     | '!' <identifier> [ <?before '('> <.sorry: "Argument not allowed on negated pair"> <circumfix> ]?
 566         { $key = $<identifier>.Str; $value = 0; }
 567     | <identifier>
 568         { $key = $<identifier>.Str; }
 569         [
 570         || <.unsp>? <?before '('> <circumfix> { $value = $<circumfix>; }
 571         || { $value = 1; }
 572         ]
 573     | $<n>=(\d+) $<id>=(<[a..z]>+) [ <?before '('> <.sorry: "2nd argument not allowed on pair"> <circumfix> ]?
 574         { $key = $<id>.Str; $value = $<n>.Str; }
 575     ]
 576     { $<k> = $key; $<v> = $value; }
 577 }
 578
 579 token quote:sym<' '>   { :dba('single quotes') "'" ~ "'" <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q).unbalanced("'"))> }
 580 token quote:sym<" ">   { :dba('double quotes') '"' ~ '"' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).unbalanced('"'))> }
 581
 582 token circumfix:sym<« »>   { :dba('shell-quote words') '«' ~ '»' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak(:ww).balanced('«','»'))> }
 583 token circumfix:sym«<< >>» { :dba('shell-quote words') '<<' ~ '>>' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak(:ww).balanced('<<','>>'))> }
 584 token circumfix:sym«< >»   { :dba('quote words') '<' ~ '>'
 585     [
 586         [ <?before 'STDIN>' > <.obs('<STDIN>', '$' ~ '*IN.lines')> ]?  # XXX fake out gimme5
 587         [ <?before '>' > <.obs('<>', "lines() to read input,\n  or ('') to represent the null string,\n  or () to represent Nil")> ]?
 588         <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q).tweak(:w).balanced('<','>'))>
 589     ]
 590 }
 591
 592 ##################
 593 # Lexer routines #
 594 ##################
 595
 596 token ws {
 597     :temp $*STUB = return self if @*MEMOS[self.pos]<ws> :exists;
 598     :my $startpos = self.pos;
 599     :my $*HIGHEXPECT = {};
 600
 601     :dba('whitespace')
 602     [
 603         | \h+ <![\#\s\\]> { @*MEMOS[$¢.pos]<ws> = $startpos; }   # common case
 604         | <?before \w> <?after \w> :::
 605             { @*MEMOS[$startpos]<ws>:delete; }
 606             <.sorry: "Whitespace is required between alphanumeric tokens">        # must \s+ between words
 607     ]
 608     ||
 609     [
 610     | <.unsp>
 611     | <.vws> <.heredoc>
 612     | <.unv>
 613     | $ { $¢.moreinput }
 614     ]*
 615
 616     {{
 617         if ($¢.pos == $startpos) {
 618             @*MEMOS[$¢.pos]<ws>:delete;
 619         }
 620         else {
 621             @*MEMOS[$¢.pos]<ws> = $startpos;
 622             @*MEMOS[$¢.pos]<endstmt> = @*MEMOS[$startpos]<endstmt>
 623                 if @*MEMOS[$startpos]<endstmt> :exists;
 624         }
 625     }}
 626 }
 627
 628 token unsp {
 629     \\ <?before [\s|'#'] >
 630     :dba('unspace')
 631     [
 632     | <.vws>
 633     | <.unv>
 634     | $ { $¢.moreinput }
 635     ]*
 636 }
 637
 638 token vws {
 639     :dba('vertical whitespace')
 640     [
 641         [
 642         | \v
 643         | '#DEBUG -1' { say "DEBUG"; $*DEBUG = -1; } \V* \v
 644         | '<<<<<<<' :: <?before [.*? \v '=======']: .*? \v '>>>>>>>' > <.sorry: 'Found a version control conflict marker'> \V* \v
 645         | '=======' :: .*? \v '>>>>>>>' \V* \v   # ignore second half
 646         ]
 647     ]+
 648 }
 649
 650 # We provide two mechanisms here:
 651 # 1) define $*moreinput, or
 652 # 2) override moreinput method
 653 method moreinput () {
 654     $*moreinput.() if $*moreinput;
 655     self;
 656 }
 657
 658 token unv {
 659    :dba('horizontal whitespace')
 660    [
 661    | \h+
 662    | <?before \h* '=' [ \w | '\\'] > ^^ <.pod_comment>
 663    | \h* <comment>
 664    ]
 665 }
 666
 667 token comment:sym<#`(...)> {
 668     '#`' :: [ <?opener> || <.panic: "Opening bracket is required for #` comment"> ]
 669     <.quibble($¢.cursor_fresh( %*LANG<Q> ))>
 670 }
 671
 672 token comment:sym<#(...)> {
 673     '#' <?opener>
 674     <.suppose
 675         <quibble($¢.cursor_fresh( %*LANG<Q> ))>
 676         <!before <[,;:]>* \h* [ '#' | $$ ] >   # extra stuff on line after closer?
 677     >
 678     <.worry: "Embedded comment seems to be missing backtick"> <!>
 679 }
 680
 681 token comment:sym<#=(...)> {
 682     '#=' <?opener> ::
 683     <quibble($¢.cursor_fresh( %*LANG<Q> ))>
 684 }
 685
 686 token comment:sym<#=> {
 687    '#=' :: $<attachment> = [\N*]
 688 }
 689
 690 token comment:sym<#> {
 691    '#' {} \N*
 692 }
 693
 694 token ident {
 695     <.alpha> \w*
 696 }
 697
 698 token apostrophe {
 699     <[ ' \- ]>
 700 }
 701
 702 token identifier {
 703     <.ident> [ <.apostrophe> <.ident> ]*
 704 }
 705
 706 # XXX We need to parse the pod eventually to support $= variables.
 707
 708 token pod_comment {
 709     ^^ \h* '=' <.unsp>?
 710     [
 711     | 'begin' \h+ <identifier> ::
 712         [
 713         || .*? "\n" [ :r \h* '=' <.unsp>? 'end' \h+ $<identifier> » \N* ]
 714         || <?{ $<identifier>.Str eq 'END'}> .*
 715         || { my $id = $<identifier>.Str; self.panic("=begin $id without matching =end $id"); }
 716         ]
 717     | 'begin' » :: \h* [ $$ || '#' || <.sorry: "Unrecognized token after =begin"> \N* ]
 718         [ .*? "\n" \h* '=' <.unsp>? 'end' » \N* || { self.panic("=begin without matching =end"); } ]
 719
 720     | 'for' » :: \h* [ <identifier> || $$ || '#' || <.sorry: "Unrecognized token after =for"> \N* ]
 721         [.*?  ^^ \h* $$ || .*]
 722     | ::
 723         [ <?before .*? ^^ '=cut' » > <.panic: "Obsolescent pod format, please use =begin/=end instead"> ]?
 724         [<alpha>||\s||<.sorry: "Illegal pod directive">]
 725         \N*
 726     ]
 727 }
 728
 729 # suppress fancy end-of-line checking
 730 token embeddedblock {
 731     # encapsulate braided languages
 732     :temp %*LANG;
 733     :my $*SIGNUM;
 734     :my $*GOAL ::= '}';
 735     :temp $*CURLEX;
 736
 737     :dba('embedded block')
 738
 739     <.newlex>
 740     <.finishlex>
 741     '{' :: [ :lang(%*LANG<MAIN>) <statementlist> ]
 742     [ '}' || <.panic: "Unable to parse statement list; couldn't find right brace"> ]
 743 }
 744
 745 token binints { [<.ws><binint><.ws>] ** ',' }
 746
 747 token binint {
 748     <[ 0..1 ]>+ [ _ <[ 0..1 ]>+ ]*
 749 }
 750
 751 token octints { [<.ws><octint><.ws>] ** ',' }
 752
 753 token octint {
 754     <[ 0..7 ]>+ [ _ <[ 0..7 ]>+ ]*
 755 }
 756
 757 token hexints { [<.ws><hexint><.ws>] ** ',' }
 758
 759 token hexint {
 760     <[ 0..9 a..f A..F ]>+ [ _ <[ 0..9 a..f A..F ]>+ ]*
 761 }
 762
 763 token decints { [<.ws><decint><.ws>] ** ',' }
 764
 765 token decint {
 766     \d+ [ _ \d+ ]*
 767 }
 768
 769 token integer {
 770     [
 771     | 0 [ b '_'? <binint>
 772         | o '_'? <octint>
 773         | x '_'? <hexint>
 774         | d '_'? <decint>
 775         | <decint>
 776             <!!{ $¢.worry("Leading 0 does not indicate octal in Perl 6; please use 0o" ~ $<decint>.Str ~ " if you mean that") }>
 777         ]
 778     | <decint>
 779     ]
 780     <!!before ['.' <?before \s | ',' | '=' | <terminator> > <.sorry: "Decimal point must be followed by digit">]? >
 781     [ <?before '_' '_'+\d> <.sorry: "Only isolated underscores are allowed inside numbers"> ]?
 782 }
 783
 784 token radint {
 785     [
 786     | <integer>
 787     | <?before ':'\d> <rad_number> <?{
 788                         defined $<rad_number><intpart>
 789                         and
 790                         not defined $<rad_number><fracpart>
 791                     }>
 792     ]
 793 }
 794
 795 token escale {
 796     <[Ee]> <[+\-]>? <decint>
 797 }
 798
 799 # careful to distinguish from both integer and 42.method
 800 token dec_number {
 801     :dba('decimal number')
 802     [
 803     | $<coeff> = [              '.' <frac=.decint> ] <escale>?
 804     | $<coeff> = [<int=.decint> '.' <frac=.decint> ] <escale>?
 805     | $<coeff> = [<int=.decint>                    ] <escale>
 806     ]
 807     [ <?before '.' \d> <.sorry: "Number contains two decimal points (missing 'v' for version number?)"> ['.'\d+]+ ]?
 808     [ <?before '_' '_'+\d> <.sorry: "Only isolated underscores are allowed inside numbers"> ]?
 809 }
 810
 811 token alnumint {
 812     [ <[ 0..9 a..z A..Z ]>+ [ _ <[ 0..9 a..z A..Z ]>+ ]* ]
 813 }
 814
 815 token rad_number {
 816     ':' $<radix> = [\d+] <.unsp>?      # XXX optional dot here?
 817     {}           # don't recurse in lexer
 818     :dba('number in radix notation')
 819     [
 820     || '<'
 821             [
 822             | $<coeff> = [                '.' <frac=.alnumint> ]
 823             | $<coeff> = [<int=.alnumint> '.' <frac=.alnumint> ]
 824             | $<coeff> = [<int=.alnumint>                    ]
 825             ]
 826             [
 827                 '*' <base=.radint>
 828                 [ '**' <exp=.radint> || <.sorry: "Base is missing ** exponent part"> ]
 829             ]?
 830        '>'
 831 #      { make radcalc($<radix>, $<coeff>, $<base>, $<exp>) }
 832     || <?before '['> <circumfix>
 833     || <?before '('> <circumfix>
 834     || <.panic: "Malformed radix number">
 835     ]
 836 }
 837
 838 token terminator:sym<)>
 839     { <sym> <O(|%terminator)> }
 840
 841 token terminator:sym<]>
 842     { ']' <O(|%terminator)> }
 843
 844 token terminator:sym<}>
 845     { '}' <O(|%terminator)> }
 846
 847 # XXX should eventually be derived from current Unicode tables.
 848 constant %open2close = (
 849 "\x0028" => "\x0029",
 850 "\x003C" => "\x003E",
 851 "\x005B" => "\x005D",
 852 "\x007B" => "\x007D",
 853 "\x00AB" => "\x00BB",
 854 "\x0F3A" => "\x0F3B",
 855 "\x0F3C" => "\x0F3D",
 856 "\x169B" => "\x169C",
 857 "\x2018" => "\x2019",
 858 "\x201A" => "\x2019",
 859 "\x201B" => "\x2019",
 860 "\x201C" => "\x201D",
 861 "\x201E" => "\x201D",
 862 "\x201F" => "\x201D",
 863 "\x2039" => "\x203A",
 864 "\x2045" => "\x2046",
 865 "\x207D" => "\x207E",
 866 "\x208D" => "\x208E",
 867 "\x2208" => "\x220B",
 868 "\x2209" => "\x220C",
 869 "\x220A" => "\x220D",
 870 "\x2215" => "\x29F5",
 871 "\x223C" => "\x223D",
 872 "\x2243" => "\x22CD",
 873 "\x2252" => "\x2253",
 874 "\x2254" => "\x2255",
 875 "\x2264" => "\x2265",
 876 "\x2266" => "\x2267",
 877 "\x2268" => "\x2269",
 878 "\x226A" => "\x226B",
 879 "\x226E" => "\x226F",
 880 "\x2270" => "\x2271",
 881 "\x2272" => "\x2273",
 882 "\x2274" => "\x2275",
 883 "\x2276" => "\x2277",
 884 "\x2278" => "\x2279",
 885 "\x227A" => "\x227B",
 886 "\x227C" => "\x227D",
 887 "\x227E" => "\x227F",
 888 "\x2280" => "\x2281",
 889 "\x2282" => "\x2283",
 890 "\x2284" => "\x2285",
 891 "\x2286" => "\x2287",
 892 "\x2288" => "\x2289",
 893 "\x228A" => "\x228B",
 894 "\x228F" => "\x2290",
 895 "\x2291" => "\x2292",
 896 "\x2298" => "\x29B8",
 897 "\x22A2" => "\x22A3",
 898 "\x22A6" => "\x2ADE",
 899 "\x22A8" => "\x2AE4",
 900 "\x22A9" => "\x2AE3",
 901 "\x22AB" => "\x2AE5",
 902 "\x22B0" => "\x22B1",
 903 "\x22B2" => "\x22B3",
 904 "\x22B4" => "\x22B5",
 905 "\x22B6" => "\x22B7",
 906 "\x22C9" => "\x22CA",
 907 "\x22CB" => "\x22CC",
 908 "\x22D0" => "\x22D1",
 909 "\x22D6" => "\x22D7",
 910 "\x22D8" => "\x22D9",
 911 "\x22DA" => "\x22DB",
 912 "\x22DC" => "\x22DD",
 913 "\x22DE" => "\x22DF",
 914 "\x22E0" => "\x22E1",
 915 "\x22E2" => "\x22E3",
 916 "\x22E4" => "\x22E5",
 917 "\x22E6" => "\x22E7",
 918 "\x22E8" => "\x22E9",
 919 "\x22EA" => "\x22EB",
 920 "\x22EC" => "\x22ED",
 921 "\x22F0" => "\x22F1",
 922 "\x22F2" => "\x22FA",
 923 "\x22F3" => "\x22FB",
 924 "\x22F4" => "\x22FC",
 925 "\x22F6" => "\x22FD",
 926 "\x22F7" => "\x22FE",
 927 "\x2308" => "\x2309",
 928 "\x230A" => "\x230B",
 929 "\x2329" => "\x232A",
 930 "\x23B4" => "\x23B5",
 931 "\x2768" => "\x2769",
 932 "\x276A" => "\x276B",
 933 "\x276C" => "\x276D",
 934 "\x276E" => "\x276F",
 935 "\x2770" => "\x2771",
 936 "\x2772" => "\x2773",
 937 "\x2774" => "\x2775",
 938 "\x27C3" => "\x27C4",
 939 "\x27C5" => "\x27C6",
 940 "\x27D5" => "\x27D6",
 941 "\x27DD" => "\x27DE",
 942 "\x27E2" => "\x27E3",
 943 "\x27E4" => "\x27E5",
 944 "\x27E6" => "\x27E7",
 945 "\x27E8" => "\x27E9",
 946 "\x27EA" => "\x27EB",
 947 "\x2983" => "\x2984",
 948 "\x2985" => "\x2986",
 949 "\x2987" => "\x2988",
 950 "\x2989" => "\x298A",
 951 "\x298B" => "\x298C",
 952 "\x298D" => "\x298E",
 953 "\x298F" => "\x2990",
 954 "\x2991" => "\x2992",
 955 "\x2993" => "\x2994",
 956 "\x2995" => "\x2996",
 957 "\x2997" => "\x2998",
 958 "\x29C0" => "\x29C1",
 959 "\x29C4" => "\x29C5",
 960 "\x29CF" => "\x29D0",
 961 "\x29D1" => "\x29D2",
 962 "\x29D4" => "\x29D5",
 963 "\x29D8" => "\x29D9",
 964 "\x29DA" => "\x29DB",
 965 "\x29F8" => "\x29F9",
 966 "\x29FC" => "\x29FD",
 967 "\x2A2B" => "\x2A2C",
 968 "\x2A2D" => "\x2A2E",
 969 "\x2A34" => "\x2A35",
 970 "\x2A3C" => "\x2A3D",
 971 "\x2A64" => "\x2A65",
 972 "\x2A79" => "\x2A7A",
 973 "\x2A7D" => "\x2A7E",
 974 "\x2A7F" => "\x2A80",
 975 "\x2A81" => "\x2A82",
 976 "\x2A83" => "\x2A84",
 977 "\x2A8B" => "\x2A8C",
 978 "\x2A91" => "\x2A92",
 979 "\x2A93" => "\x2A94",
 980 "\x2A95" => "\x2A96",
 981 "\x2A97" => "\x2A98",
 982 "\x2A99" => "\x2A9A",
 983 "\x2A9B" => "\x2A9C",
 984 "\x2AA1" => "\x2AA2",
 985 "\x2AA6" => "\x2AA7",
 986 "\x2AA8" => "\x2AA9",
 987 "\x2AAA" => "\x2AAB",
 988 "\x2AAC" => "\x2AAD",
 989 "\x2AAF" => "\x2AB0",
 990 "\x2AB3" => "\x2AB4",
 991 "\x2ABB" => "\x2ABC",
 992 "\x2ABD" => "\x2ABE",
 993 "\x2ABF" => "\x2AC0",
 994 "\x2AC1" => "\x2AC2",
 995 "\x2AC3" => "\x2AC4",
 996 "\x2AC5" => "\x2AC6",
 997 "\x2ACD" => "\x2ACE",
 998 "\x2ACF" => "\x2AD0",
 999 "\x2AD1" => "\x2AD2",
1000 "\x2AD3" => "\x2AD4",
1001 "\x2AD5" => "\x2AD6",
1002 "\x2AEC" => "\x2AED",
1003 "\x2AF7" => "\x2AF8",
1004 "\x2AF9" => "\x2AFA",
1005 "\x2E02" => "\x2E03",
1006 "\x2E04" => "\x2E05",
1007 "\x2E09" => "\x2E0A",
1008 "\x2E0C" => "\x2E0D",
1009 "\x2E1C" => "\x2E1D",
1010 "\x2E20" => "\x2E21",
1011 "\x3008" => "\x3009",
1012 "\x300A" => "\x300B",
1013 "\x300C" => "\x300D",
1014 "\x300E" => "\x300F",
1015 "\x3010" => "\x3011",
1016 "\x3014" => "\x3015",
1017 "\x3016" => "\x3017",
1018 "\x3018" => "\x3019",
1019 "\x301A" => "\x301B",
1020 "\x301D" => "\x301E",
1021 "\xFD3E" => "\xFD3F",
1022 "\xFE17" => "\xFE18",
1023 "\xFE35" => "\xFE36",
1024 "\xFE37" => "\xFE38",
1025 "\xFE39" => "\xFE3A",
1026 "\xFE3B" => "\xFE3C",
1027 "\xFE3D" => "\xFE3E",
1028 "\xFE3F" => "\xFE40",
1029 "\xFE41" => "\xFE42",
1030 "\xFE43" => "\xFE44",
1031 "\xFE47" => "\xFE48",
1032 "\xFE59" => "\xFE5A",
1033 "\xFE5B" => "\xFE5C",
1034 "\xFE5D" => "\xFE5E",
1035 "\xFF08" => "\xFF09",
1036 "\xFF1C" => "\xFF1E",
1037 "\xFF3B" => "\xFF3D",
1038 "\xFF5B" => "\xFF5D",
1039 "\xFF5F" => "\xFF60",
1040 "\xFF62" => "\xFF63",
1041 );
1042
1043 constant %close2open = invert %open2close;
1044
1045 token opener {
1046   <[
1047 \x0028 \x003C \x005B \x007B \x00AB \x0F3A \x0F3C \x169B \x2018 \x201A \x201B
1048 \x201C \x201E \x201F \x2039 \x2045 \x207D \x208D \x2208 \x2209 \x220A \x2215
1049 \x223C \x2243 \x2252 \x2254 \x2264 \x2266 \x2268 \x226A \x226E \x2270 \x2272
1050 \x2274 \x2276 \x2278 \x227A \x227C \x227E \x2280 \x2282 \x2284 \x2286 \x2288
1051 \x228A \x228F \x2291 \x2298 \x22A2 \x22A6 \x22A8 \x22A9 \x22AB \x22B0 \x22B2
1052 \x22B4 \x22B6 \x22C9 \x22CB \x22D0 \x22D6 \x22D8 \x22DA \x22DC \x22DE \x22E0
1053 \x22E2 \x22E4 \x22E6 \x22E8 \x22EA \x22EC \x22F0 \x22F2 \x22F3 \x22F4 \x22F6
1054 \x22F7 \x2308 \x230A \x2329 \x23B4 \x2768 \x276A \x276C \x276E \x2770 \x2772
1055 \x2774 \x27C3 \x27C5 \x27D5 \x27DD \x27E2 \x27E4 \x27E6 \x27E8 \x27EA \x2983
1056 \x2985 \x2987 \x2989 \x298B \x298D \x298F \x2991 \x2993 \x2995 \x2997 \x29C0
1057 \x29C4 \x29CF \x29D1 \x29D4 \x29D8 \x29DA \x29F8 \x29FC \x2A2B \x2A2D \x2A34
1058 \x2A3C \x2A64 \x2A79 \x2A7D \x2A7F \x2A81 \x2A83 \x2A8B \x2A91 \x2A93 \x2A95
1059 \x2A97 \x2A99 \x2A9B \x2AA1 \x2AA6 \x2AA8 \x2AAA \x2AAC \x2AAF \x2AB3 \x2ABB
1060 \x2ABD \x2ABF \x2AC1 \x2AC3 \x2AC5 \x2ACD \x2ACF \x2AD1 \x2AD3 \x2AD5 \x2AEC
1061 \x2AF7 \x2AF9 \x2E02 \x2E04 \x2E09 \x2E0C \x2E1C \x2E20 \x3008 \x300A \x300C
1062 \x300E \x3010 \x3014 \x3016 \x3018 \x301A \x301D \xFD3E \xFE17 \xFE35 \xFE37
1063 \xFE39 \xFE3B \xFE3D \xFE3F \xFE41 \xFE43 \xFE47 \xFE59 \xFE5B \xFE5D \xFF08
1064 \xFF1C \xFF3B \xFF5B \xFF5F \xFF62
1065   ]>
1066 }
1067
1068 grammar P6 is STD {
1069
1070     ###################
1071     # Top-level rules #
1072     ###################
1073
1074     # Note: we only check for the stopper.  We don't check for ^ because
1075     # we might be embedded in something else.
1076     rule comp_unit {
1077         :my $*begin_compunit = 1;
1078         :my $*endargs = -1;
1079         :my %*LANG;
1080         :my $*PKGDECL ::= "";
1081         :my $*IN_DECL = '';
1082         :my $*DECLARAND;
1083         :my $*OFTYPE;
1084         :my $*NEWPKG;
1085         :my $*NEWLEX;
1086         :my $*QSIGIL ::= '';
1087         :my $*IN_META = '';
1088         :my $*QUASIMODO;
1089         :my $*SCOPE = "";
1090         :my $*LEFTSIGIL;
1091         :my $*PRECLIM;
1092         :my %*MYSTERY = ();
1093         :my $*INVOCANT_OK;
1094         :my $*INVOCANT_IS;
1095         :my $*CURLEX;
1096         :my $*MULTINESS = '';
1097         :my $*SIGNUM = 0;
1098         :my $*MONKEY_TYPING = False;
1099         :my %*WORRIES;
1100         :my @*WORRIES;
1101         :my $*FATALS = 0;
1102         :my $*IN_SUPPOSE = False;
1103
1104         :my $*CURPKG;
1105         {{
1106
1107             %*LANG<MAIN>    = ::STD::P6 ;
1108             %*LANG<Q>       = ::STD::Q ;
1109             %*LANG<Quasi>   = ::STD::Quasi ;
1110             %*LANG<Regex>   = ::STD::Regex ;
1111             %*LANG<P5>      = ::STD::P5 ;
1112             %*LANG<P5Regex> = ::STD::P5::Regex ;
1113
1114             @*WORRIES = ();
1115             self.load_setting($*SETTINGNAME);
1116             my $oid = $*SETTING.id;
1117             my $id = 'MY:file<' ~ $*FILE<name> ~ '>';
1118             $*CURLEX = Stash.new(
1119                 'OUTER::' => [$oid],
1120                 '!file' => $*FILE, '!line' => 0,
1121                 '!id' => [$id],
1122             );
1123             $ALL.{$id} = $*CURLEX;
1124             $*UNIT = $*CURLEX;
1125             $ALL.<UNIT> = $*UNIT;
1126             self.finishlex;
1127             # $¢ = self.cursor_fresh($*CURLEX<$?LANGNAME>);
1128         }}
1129         <.unitstart>
1130         <statementlist>
1131         [ <?unitstopper> || <.panic: "Confused"> ]
1132         # "CHECK" time...
1133         {{
1134             $¢.explain_mystery();
1135             $¢.<LEX> = $*CURLEX;
1136             if @*WORRIES {
1137                 note "Potential difficulties:\n  " ~ join( "\n  ", @*WORRIES) ~ "\n";
1138             }
1139             die "Check failed\n" if $*FATALS;
1140         }}
1141     }
1142
1143     # Note: because of the possibility of placeholders we can't determine arity of
1144     # the block syntactically, so this must be determined via semantic analysis.
1145     # Also, pblocks used in an if/unless statement do not treat $_ as a placeholder,
1146     # while most other blocks treat $_ as equivalent to $^x.  Therefore the first
1147     # possible place to check arity is not here but in the rule that calls this
1148     # rule.  (Could also be done in a later pass.)
1149
1150     token pblock () {
1151         :temp $*CURLEX;
1152         :dba('parameterized block')
1153         [<?before <.lambda> | '{' > ||
1154             {{
1155                 if $*BORG and $*BORG.<block> {
1156                     if $*BORG.<name> {
1157                         my $m = "Function '" ~ $*BORG.<name> ~ "' needs parens to avoid gobbling block" ~ $*BORG.<culprit>.locmess;
1158                         $*BORG.<block>.panic($m ~ "\nMissing block (apparently gobbled by '" ~ $*BORG.<name> ~ "')");
1159                     }
1160                     else {
1161                         my $m = "Expression needs parens to avoid gobbling block" ~ $*BORG.<culprit>.locmess;
1162                         $*BORG.<block>.panic($m ~ "\nMissing block (apparently gobbled by expression)");
1163                     }
1164                 }
1165                 elsif %*MYSTERY {
1166                     $¢.panic("Missing block (apparently gobbled by undeclared routine?)");
1167                 }
1168                 else {
1169                     $¢.panic("Missing block");
1170                 }
1171             }}
1172         ]
1173         [
1174         | <lambda>
1175             <.newlex(1)>
1176             <signature(1)>
1177             <blockoid>
1178             <.getsig>
1179         | <?before '{'>
1180             <.newlex(1)>
1181             <blockoid>
1182             <.getsig>
1183         ]
1184     }
1185
1186     # this is a hook for subclasses
1187     token unitstart { <?> }
1188     token lambda { '->' | '<->' }
1189
1190     # Look for an expression followed by a required lambda.
1191     token xblock {
1192         :my $*GOAL ::= '{';
1193         :my $*BORG = {};
1194         <EXPR>
1195         { $*BORG.<culprit> //= $<EXPR>.cursor(self.pos) }
1196         <.ws>
1197         <pblock>
1198     }
1199
1200     token block () {
1201         :temp $*CURLEX;
1202         :dba('scoped block')
1203         [ <?before '{' > || <.panic: "Missing block"> ]
1204         <.newlex>
1205         <blockoid>
1206         <.checkyada>
1207     }
1208
1209     token blockoid {
1210         # encapsulate braided languages
1211         :temp %*LANG;
1212         :my $*SIGNUM;
1213
1214         <.finishlex>
1215         [
1216         | '{YOU_ARE_HERE}' <.you_are_here>
1217         | :dba('block') '{' ~ '}' <statementlist> :: <.curlycheck>
1218         | <?terminator> <.panic: 'Missing block'>
1219         | <?> <.panic: "Malformed block">
1220         ]
1221     }
1222
1223     token curlycheck {
1224         [
1225         || <?before \h* $$>  # (usual case without comments)
1226             { @*MEMOS[$¢.pos]<endstmt> = 2; }
1227         || <?before \h* <[\\,:]>>
1228         || <.unv> $$
1229             { @*MEMOS[$¢.pos]<endstmt> = 2; }
1230         || <.unsp>? { @*MEMOS[$¢.pos]<endargs> = 1; }
1231         ]
1232     }
1233
1234     token regex_block {
1235         # encapsulate braided languages
1236         :temp %*LANG;
1237         :temp %*RX;
1238
1239         :my $lang = %*LANG<Regex>;
1240         :my $*GOAL ::= '}';
1241
1242         [ <quotepair> <.ws>
1243             {
1244                 my $kv = $<quotepair>[*-1];
1245                 $lang = ($lang.tweak($kv.<k>, $kv.<v>)
1246                     or $lang.panic("Unrecognized adverb :" ~ $kv.<k> ~ '(' ~ $kv.<v> ~ ')'));
1247             }
1248         ]*
1249
1250         [
1251         | '{*}' <?{ $*MULTINESS eq 'proto' }> { $¢.<onlystar> = 1 }
1252         | [
1253             '{'
1254             <nibble( $¢.cursor_fresh($lang).unbalanced('}') )>
1255             [ '}' || <.panic: "Unable to parse regex; couldn't find right brace"> ]
1256           ]
1257         ]
1258
1259         <.curlycheck>
1260     }
1261
1262     # statement semantics
1263     rule statementlist {
1264         :my $*INVOCANT_OK = 0;
1265         :temp $*MONKEY_TYPING;
1266         :dba('statement list')
1267
1268         [
1269         | $
1270         | <?before <[\)\]\}]>>
1271         | [<statement><eat_terminator> ]*
1272                 { self.mark_sinks($<statement>) }
1273         ]
1274     }
1275
1276     # embedded semis, context-dependent semantics
1277     rule semilist {
1278         :my $*INVOCANT_OK = 0;
1279         :dba('semicolon list')
1280         [
1281         | <?before <[\)\]\}]>>
1282         | [<statement><eat_terminator> ]*
1283         ]
1284     }
1285
1286
1287     token label {
1288         :my $label;
1289         <identifier> ':' <?before \s> <.ws>
1290
1291         [ <?{ $¢.is_name($label = $<identifier>.Str) }>
1292           <.sorry("Illegal redeclaration of '$label'")>
1293         ]?
1294
1295         # add label as a pseudo constant
1296         {{ $¢.add_constant($label,self.label_id); }}
1297
1298     }
1299
1300     token statement {
1301         :my $*endargs = -1;
1302         :my $*QSIGIL ::= 0;
1303         <!before <[\)\]\}]> >
1304         <!stopper>
1305
1306         # this could either be a statement that follows a declaration
1307         # or a statement that is within the block of a code declaration
1308         <!!{ $*LASTSTATE = $¢.pos; $¢ = %*LANG<MAIN>.bless($¢); }>
1309
1310         [
1311         | <label> <statement>
1312         | <statement_control>
1313         | <EXPR>
1314             :dba('statement end')
1315             [
1316             || <?{ (@*MEMOS[$¢.pos]<endstmt> // 0) == 2 }>   # no mod after end-line curly
1317             ||
1318                 :dba('statement modifier')
1319                 <.ws>
1320                 [
1321                 | <statement_mod_loop>
1322                     {{
1323                         my $sp = $<EXPR><statement_prefix>;
1324                         if $sp and $sp<sym> eq 'do' {
1325                            my $s = $<statement_mod_loop>[0]<sym>;
1326                            $¢.obs("do...$s" ,"repeat...$s");
1327                         }
1328                     }}
1329                 | <statement_mod_cond>
1330                     :dba('statement modifier loop')
1331                     [
1332                     || <?{ (@*MEMOS[$¢.pos]<endstmt> // 0) == 2 }>
1333                     || <.ws> <statement_mod_loop>?
1334                     ]
1335                 ]?
1336             ]
1337         | <?before ';'>
1338         | <?before <stopper> >
1339         | {} <.panic: "Bogus statement">
1340         ]
1341
1342         # Is there more on same line after a block?
1343         [ <?{ (@*MEMOS[@*MEMOS[$¢.pos]<ws>//$¢.pos]<endargs>//0) == 1 }>
1344             \h*
1345             <!before ';' | ')' | ']' | '}' >
1346             <!infixstopper>
1347             { $*HIGHWATER = $¢.pos = @*MEMOS[$¢.pos]<ws>//$¢.pos; }
1348             <.panic: "Strange text after block (missing comma, semicolon, comment marker?)">
1349         ]?
1350     }
1351
1352     token eat_terminator {
1353         [
1354         || ';'
1355         || <?{ (@*MEMOS[$¢.pos]<endstmt>//0) >= 2 }> <.ws>
1356         || <?before ')' | ']' | '}' >
1357         || $
1358         || <?stopper>
1359         || <?before <.suppose <statement_control> > > <.backup_ws> { $*HIGHWATER = -1; } <.panic: "Missing semicolon">
1360         || <.panic: "Confused">
1361         ]
1362     }
1363
1364     # undo any line transition
1365     method backup_ws () {
1366         if @*MEMOS[self.pos]<ws> {
1367             return self.cursor(@*MEMOS[self.pos]<ws>);
1368         }
1369         return self;
1370     }
1371
1372     #####################
1373     # statement control #
1374     #####################
1375
1376     token statement_control:need {
1377         :my $longname;
1378         <sym>:s
1379         [
1380         |<version>
1381         |<module_name>
1382             {{
1383                 my $*IN_DECL = 'use';
1384                 my $*SCOPE = 'use';
1385                 $longname = $<module_name>[*-1]<longname>;
1386                 $¢.do_need($longname<name>);
1387             }}
1388         ] ** ','
1389     }
1390
1391     token statement_control:import {
1392         :my $*IN_DECL = 'use';
1393         :my $*SCOPE = 'use';
1394         <sym> <.ws>
1395         <term>
1396         [
1397         || <.spacey> <arglist>
1398             {{
1399                 my %*MYSTERY;
1400                 $¢.do_import($<term>, $<arglist>);
1401                 $¢.explain_mystery();
1402             }}
1403         || {{ $¢.do_import($<term>, ''); }}
1404         ]
1405         <.ws>
1406     }
1407
1408     token statement_control:use {
1409         :my $longname;
1410         :my $*IN_DECL = 'use';
1411         :my $*SCOPE = 'use';
1412         :my %*MYSTERY;
1413         <sym> <.ws>
1414         [
1415         | <version>
1416         | <module_name>
1417             {{
1418                 $longname = $<module_name><longname>;
1419                 if $longname.Str eq 'MONKEY_TYPING' {
1420                     $*MONKEY_TYPING = True;
1421                 }
1422             }}
1423             [
1424             || <.spacey> <arglist>
1425                 {{
1426                     $¢.do_use($longname<name>, $<arglist>);
1427                 }}
1428             || {{ $¢.do_use($longname<name>, ''); }}
1429             ]
1430         ]
1431         <.ws>
1432         <.explain_mystery>
1433     }
1434
1435
1436     token statement_control:no {
1437         :my %*MYSTERY;
1438         <sym> <.ws>
1439         <module_name>[<.spacey><arglist>]?
1440         <.ws>
1441         <.explain_mystery>
1442     }
1443
1444
1445     token statement_control:if {
1446         <sym> :s
1447         <xblock>
1448         [
1449             [
1450             | 'else'\h*'if' <.sorry: "Please use 'elsif'">
1451             | 'elsif'<?spacey> <elsif=.xblock>
1452             ]
1453         ]*
1454         [
1455             'else'<?spacey> <else=.pblock>
1456         ]?
1457     }
1458
1459
1460     token statement_control:unless {
1461         <sym> :s
1462         <xblock>
1463         [ <!before 'else'> || <.panic: "\"unless\" does not take \"else\" in Perl 6; please rewrite using \"if\""> ]
1464     }
1465
1466
1467     token statement_control:while {
1468         <sym> :s
1469         [ <?before '(' ['my'? '$'\w+ '=']? '<' '$'?\w+ '>' ')'>   #'
1470             <.panic: "This appears to be Perl 5 code"> ]?
1471         <xblock>
1472     }
1473
1474
1475     token statement_control:until {
1476         <sym> :s
1477         <xblock>
1478     }
1479
1480
1481     token statement_control:repeat {
1482         <sym> :s
1483         [
1484             | $<wu>=['while'|'until']<.spacey>
1485               <xblock>
1486             | <pblock>
1487               $<wu>=['while'|'until'][<.spacey>||<.panic: "Whitespace required after keyword">] <EXPR>
1488         ]
1489     }
1490
1491     token statement_control:loop {
1492         <sym> <.ws>
1493         $<eee> = (
1494             '(' [ :s
1495                 <e1=.EXPR>? ';'
1496                 <e2=.EXPR>? ';'
1497                 <e3=.EXPR>?
1498             ')'||<.panic: "Malformed loop spec">]
1499             [ <?before '{' > <.sorry: "Whitespace required before block"> ]?
1500         )? <.ws>
1501         <block>
1502     }
1503
1504
1505     token statement_control:for {
1506         <sym> :s
1507         [ <?before 'my'? '$'\w+ '(' >
1508             <.panic: "This appears to be Perl 5 code"> ]?
1509         [ <?before '(' <.EXPR>? ';' <.EXPR>? ';' <.EXPR>? ')' >
1510             <.obs('C-style "for (;;)" loop', '"loop (;;)"')> ]?
1511         <xblock>
1512     }
1513
1514     token statement_control:foreach {
1515         <sym> <.obs("'foreach'", "'for'")>
1516     }
1517
1518     token statement_control:given {
1519         <sym> :s
1520         <xblock>
1521     }
1522     token statement_control:when {
1523         <sym> :s
1524         <?before ('True'|'False')»<.dumbsmart($0.Str)>>?
1525         <xblock>
1526     }
1527     rule statement_control:default {<sym> <block> }
1528
1529     token statement_prefix:BEGIN   { :my %*MYSTERY; <sym> <blast> <.explain_mystery> }
1530     token statement_prefix:CHECK   { <sym> <blast> }
1531     token statement_prefix:INIT    { <sym> <blast> }
1532     token statement_prefix:START   { <sym> <blast> }
1533     token statement_prefix:ENTER   { <sym> <blast> }
1534     token statement_prefix:FIRST   { <sym> <blast> }
1535
1536     token statement_prefix:END     { <sym> <blast> }
1537     token statement_prefix:LEAVE   { <sym> <blast> }
1538     token statement_prefix:KEEP    { <sym> <blast> }
1539     token statement_prefix:UNDO    { <sym> <blast> }
1540     token statement_prefix:NEXT    { <sym> <blast> }
1541     token statement_prefix:LAST    { <sym> <blast> }
1542     token statement_prefix:PRE     { <sym> <blast> }
1543     token statement_prefix:POST    { <sym> <blast> }
1544
1545     rule statement_control:CATCH   {<sym> <block> }
1546     rule statement_control:CONTROL {<sym> <block> }
1547     rule statement_control:TEMP    {<sym> <block> }
1548
1549     #######################
1550     # statement modifiers #
1551     #######################
1552
1553     rule modifier_expr { <EXPR> }
1554
1555     rule statement_mod_cond:if     {<sym> <modifier_expr> }
1556     rule statement_mod_cond:unless {<sym> <modifier_expr> }
1557     rule statement_mod_cond:when   {<sym> <?before \h*('True'|'False')»<.dumbsmart($0[0].Str)>>? <modifier_expr> }
1558
1559     rule statement_mod_loop:while {<sym> <modifier_expr> }
1560     rule statement_mod_loop:until {<sym> <modifier_expr> }
1561
1562     rule statement_mod_loop:for   {<sym> <modifier_expr> }
1563     rule statement_mod_loop:given {<sym> <modifier_expr> }
1564
1565     ################
1566     # module names #
1567     ################
1568
1569     token module_name:normal {
1570         <longname>
1571         [ <?before '['> :dba('generic role') '[' ~ ']' <arglist> ]?
1572     }
1573
1574     token vnum {
1575         \d+ | '*'
1576     }
1577
1578     token version:sym<v> {
1579         'v' <?before \d+> :: <vnum> ** '.' '+'?
1580     }
1581
1582     ###############
1583     # Declarators #
1584     ###############
1585
1586     token variable_declarator {
1587         :my $*IN_DECL = 'variable';
1588         :my $*DECLARAND;
1589         :my $var;
1590         <variable>
1591         {
1592             $var = $<variable>.Str;
1593             $¢.add_variable($var);
1594             $*IN_DECL = '';
1595         }
1596         [   # Is it a shaped array or hash declaration?
1597           #  <?{ $<sigil> eq '@' | '%' }>
1598             <.unsp>?
1599             $<shape> = [
1600             | '(' ~ ')' <signature>
1601                 {{
1602                     given substr($var,0,1) {
1603                         when '&' {
1604                             $¢.sorry("The () shape syntax in routine declarations is reserved (maybe use :() to declare a longname?)");
1605                         }
1606                         when '@' {
1607                             $¢.sorry("The () shape syntax in array declarations is reserved");
1608                         }
1609                         when '%' {
1610                             $¢.sorry("The () shape syntax in hash declarations is reserved");
1611                         }
1612                         default {
1613                             $¢.sorry("The () shape syntax in variable declarations is reserved");
1614                         }
1615                     }
1616                 }}
1617             | :dba('shape definition') '[' ~ ']' <semilist>
1618             | :dba('shape definition') '{' ~ '}' <semilist> <.curlycheck>
1619             | <?before '<'> <postcircumfix>
1620             ]*
1621         ]?
1622         <.ws>
1623
1624         <trait>*
1625         <post_constraint>*
1626         <.getdecl>
1627     }
1628
1629     rule scoped ($*SCOPE) {
1630         :dba('scoped declarator')
1631         [
1632         | <declarator>
1633         | <regex_declarator>
1634         | <package_declarator>
1635         | [<typename> ]+
1636             {
1637                 my $t = $<typename>;
1638                 @$t > 1 and $¢.sorry("Multiple prefix constraints not yet supported");
1639                 $*OFTYPE = $t[0];
1640             }
1641             <multi_declarator>
1642         | <multi_declarator>
1643         ]
1644         || <?before <[A..Z]>><longname>{{
1645                 my $t = $<longname>.Str;
1646                 if not $¢.is_known($t) {
1647                     $¢.sorry("In $*SCOPE declaration, typename '$t' must be predeclared (or marked as declarative with :: prefix)");
1648                 }
1649             }}
1650             <!> # drop through
1651         || <.panic: "Malformed $*SCOPE">
1652     }
1653
1654     token scope_declarator:my        { <sym> <scoped('my')> }
1655     token scope_declarator:our       { <sym> <scoped('our')> }
1656     token scope_declarator:anon      { <sym> <scoped('anon')> }
1657     token scope_declarator:state     { <sym> <scoped('state')> }
1658     token scope_declarator:has       { <sym> <scoped('has')> }
1659     token scope_declarator:augment   { <sym> <scoped('augment')> }
1660     token scope_declarator:supersede { <sym> <scoped('supersede')> }
1661
1662
1663     token package_declarator:class {
1664         :my $*PKGDECL ::= 'class';
1665         <sym> <package_def>
1666     }
1667
1668     token package_declarator:grammar {
1669         :my $*PKGDECL ::= 'grammar';
1670         <sym> <package_def>
1671     }
1672
1673     token package_declarator:module {
1674         :my $*PKGDECL ::= 'module';
1675         <sym> <package_def>
1676     }
1677
1678     token package_declarator:package {
1679         :my $*PKGDECL ::= 'package';
1680         <sym> <package_def>
1681     }
1682
1683     token package_declarator:role {
1684         :my $*PKGDECL ::= 'role';
1685         <sym> <package_def>
1686     }
1687
1688     token package_declarator:knowhow {
1689         :my $*PKGDECL ::= 'knowhow';
1690         <sym> <package_def>
1691     }
1692
1693     token package_declarator:slang {
1694         :my $*PKGDECL ::= 'slang';
1695         <sym> <package_def>
1696     }
1697
1698     token package_declarator:require {   # here because of declarational aspects
1699         <sym> <.ws>
1700         [
1701         || <module_name> <EXPR>?
1702         || <EXPR>
1703         ]
1704     }
1705
1706     token package_declarator:trusts {
1707         <sym> <.ws>
1708         <module_name>
1709     }
1710
1711     token package_declarator:sym<also> {
1712         <sym>:s
1713         <trait>+
1714     }
1715
1716     rule package_def {
1717         :my $longname;
1718         :my $*IN_DECL = 'package';
1719         :my $*DECLARAND;
1720         :my $*NEWPKG;
1721         :my $*NEWLEX;
1722         :my $outer = $*CURLEX;
1723         :temp $*CURPKG;
1724         :temp $*CURLEX;
1725         { $*SCOPE ||= 'our'; }
1726         [
1727             [ <longname> { $longname = $<longname>[0]; $¢.add_name($longname<name>.Str); } ]?
1728             <.newlex>
1729             [ :dba('generic role')
1730                 <?{ ($*PKGDECL//'') eq 'role' }>
1731                 '[' ~ ']' <signature>
1732                 { $*IN_DECL = ''; }
1733             ]?
1734             <trait>*
1735             <.getdecl>
1736             [
1737             || <?before '{'>
1738                 [
1739                 {{
1740                     # figure out the actual full package name (nested in outer package)
1741                     if $longname and $*NEWPKG {
1742                         my $shortname = $longname.<name>.Str;
1743                         if $*SCOPE eq 'our' {
1744                             $*CURPKG = $*NEWPKG // $*CURPKG.{$shortname ~ '::'};
1745                             self.deb("added our " ~ $*CURPKG.id) if $*DEBUG +& DEBUG::symtab;
1746                         }
1747                         else {
1748                             $*CURPKG = $*NEWPKG // $*CURPKG.{$shortname ~ '::'};
1749                             self.deb("added my " ~ $*CURPKG.id) if $*DEBUG +& DEBUG::symtab;
1750                         }
1751                     }
1752                     $*begin_compunit = 0;
1753                     $*UNIT<$?LONGNAME> ||= $longname ?? $longname<name>.Str !! '';
1754                 }}
1755                 { $*IN_DECL = ''; }
1756                 <blockoid>
1757                 <.checkyada>
1758                 ]
1759             || <?before ';'>
1760                 [
1761                 || <?{ $*begin_compunit }>
1762                     {{
1763                         $longname orelse $¢.panic("Compilation unit cannot be anonymous");
1764                         $outer == $*UNIT or $¢.panic("Semicolon form of " ~ $*PKGDECL ~ " definition not allowed in subscope;\n  please use block form");
1765                         $*PKGDECL eq 'package' and $¢.panic("Semicolon form of package definition indicates a Perl 5 module; unfortunately,\n  STD doesn't know how to parse Perl 5 code yet");
1766                         my $shortname = $longname.<name>.Str;
1767                         $*CURPKG = $*NEWPKG // $*CURPKG.{$shortname ~ '::'};
1768                         $*begin_compunit = 0;
1769
1770                         # XXX throws away any role sig above
1771                         $*CURLEX = $outer;
1772
1773                         $*UNIT<$?LONGNAME> = $longname<name>.Str;
1774                     }}
1775                     { $*IN_DECL = ''; }
1776                     <statementlist>     # whole rest of file, presumably
1777                 || <.panic: "Too late for semicolon form of " ~ $*PKGDECL ~ " definition">
1778                 ]
1779             || <.panic: "Unable to parse " ~ $*PKGDECL ~ " definition">
1780             ]
1781         ] || <.panic: "Malformed $*PKGDECL">
1782     }
1783
1784     token declarator {
1785         [
1786         | <variable_declarator>
1787             [ <?before <.ws>','<.ws> { @*MEMOS[$¢.pos]<declend> = $*SCOPE; }> ]?
1788         | '(' ~ ')' <signature> <trait>*
1789         | <routine_declarator>
1790         | <regex_declarator>
1791         | <type_declarator>
1792         ]
1793     }
1794
1795     token multi_declarator:multi {
1796         :my $*MULTINESS = 'multi';
1797         <sym> <.ws> [ <declarator> || <routine_def('multi')> || <.panic: 'Malformed multi'> ]
1798     }
1799     token multi_declarator:proto {
1800         :my $*MULTINESS = 'proto';
1801         <sym> <.ws> [ <declarator> || <routine_def('proto')> || <.panic: 'Malformed proto'> ]
1802     }
1803     token multi_declarator:only {
1804         :my $*MULTINESS = 'only';
1805         <sym> <.ws> [ <declarator> || <routine_def('only')> || <.panic: 'Malformed only'> ]
1806     }
1807     token multi_declarator:null {
1808         :my $*MULTINESS = '';
1809         <declarator>
1810     }
1811
1812     token routine_declarator:sub       { <sym> <routine_def('sub')> }
1813     token routine_declarator:method    { <sym> <method_def> }
1814     token routine_declarator:submethod { <sym> <method_def> }
1815     token routine_declarator:macro     { <sym> <macro_def> }
1816
1817     token regex_declarator:regex { <sym>       <regex_def(:!r,:!s)> }
1818     token regex_declarator:token { <sym>       <regex_def(:r,:!s)> }
1819     token regex_declarator:rule  { <sym>       <regex_def(:r,:s)> }
1820
1821     rule multisig {
1822         :my $signum = 0;
1823         :dba('signature')
1824         [
1825             ':'?'(' ~ ')' <signature(++$signum)>
1826         ]
1827         ** '|'
1828     }
1829
1830     method checkyada {
1831         try {
1832             my $statements = self.<blockoid><statementlist><statement>;
1833             my $startsym = $statements[0]<EXPR><sym> // '';
1834             given $startsym {
1835                 when '...' { $*DECLARAND<stub> = 1 }
1836                 when '!!!' { $*DECLARAND<stub> = 1 }
1837                 when '???' { $*DECLARAND<stub> = 1 }
1838                 when '*' {
1839                     if $*MULTINESS eq 'proto' and $statements.elems == 1 {
1840                         self.<blockoid>:delete;
1841                         self.<onlystar> = 1;
1842                     }
1843                 }
1844             }
1845         }
1846         return self;
1847     }
1848
1849     rule routine_def ($d) {
1850         :temp $*CURLEX;
1851         :my $*IN_DECL = $d;
1852         :my $*DECLARAND;
1853         [
1854             [ $<sigil>=['&''*'?] <deflongname>? | <deflongname> ]?
1855             <.newlex(1)>
1856             [ <multisig> | <trait> ]*
1857             [ <!before '{'> <.panic: "Malformed block"> ]?
1858             <!{
1859                 $*IN_DECL = '';
1860             }>
1861             <blockoid>:!s
1862             <.checkyada>
1863             <.getsig>
1864             <.getdecl>
1865         ] || <.panic: "Malformed routine">
1866     }
1867
1868     rule method_def () {
1869         :temp $*CURLEX;
1870         :my $*IN_DECL = 'method';
1871         :my $*DECLARAND;
1872         <.newlex(1)>
1873         [
1874             [
1875             | $<type>=[<[ ! ^ ]>?]<longname> [ <multisig> | <trait> ]*
1876             | <multisig> <trait>*
1877             | <sigil> '.'
1878                 :dba('subscript signature')
1879                 [
1880                 | '(' ~ ')' <signature>
1881                 | '[' ~ ']' <signature>
1882                 | '{' ~ '}' <signature> # don't need curlycheck here
1883                 | <?before '<'> <postcircumfix>
1884                 ]
1885                 <trait>*
1886             | <?>
1887             ]
1888             { $*IN_DECL = ''; }
1889             <blockoid>:!s
1890             <.checkyada>
1891             <.getsig>
1892             <.getdecl>
1893         ] || <.panic: "Malformed method">
1894     }
1895
1896     rule regex_def (:$r, :$s) {
1897         :temp $*CURLEX;
1898         :my $*IN_DECL = 'regex';
1899         :temp %*RX;
1900         :my $*DECLARAND;
1901         { %*RX<s> = $s; %*RX<r> = $r; }
1902         [
1903             [ '&'<deflongname>? | <deflongname> ]?
1904             <.newlex(1)>
1905             [ [ ':'?'(' <signature(1)> ')'] | <trait> ]*
1906             [ <!before '{'> <.panic: "Malformed block"> ]?
1907             { $*IN_DECL = ''; }
1908             <.finishlex>
1909             <regex_block>:!s
1910             <.getsig>
1911             <.getdecl>
1912         ] || <.panic: "Malformed regex">
1913     }
1914
1915     rule macro_def () {
1916         :temp $*CURLEX;
1917         :my $*IN_DECL = 'macro';
1918         :my $*DECLARAND;
1919         [
1920             [ '&'<deflongname>? | <deflongname> ]?
1921             <.newlex(1)>
1922             [ <multisig> | <trait> ]*
1923             [ <!before '{'> <.panic: "Malformed block"> ]?
1924             { $*IN_DECL = ''; }
1925             <blockoid>:!s
1926             <.checkyada>
1927             <.getsig>
1928             <.getdecl>
1929         ] || <.panic: "Malformed macro">
1930     }
1931
1932     rule trait {
1933         :my $*IN_DECL = 0;
1934         [
1935         | <trait_mod>
1936         | <colonpair>
1937         ]
1938     }
1939
1940     token trait_mod:is {
1941         <sym>:s <longname><circumfix>?  # e.g. context<rw> and Array[Int]
1942         {{
1943             if $*DECLARAND {
1944                 my $traitname = $<longname>.Str;
1945                 # XXX eventually will use multiple dispatch
1946                 $*DECLARAND{$traitname} = self.gettrait($traitname, $<circumfix>);
1947             }
1948         }}
1949     }
1950     token trait_mod:hides {
1951         <sym>:s <module_name>
1952     }
1953     token trait_mod:does {
1954         :my $*PKGDECL ::= 'role';
1955         <sym>:s <module_name>
1956     }
1957     token trait_mod:will {
1958         <sym>:s <identifier> <pblock>
1959     }
1960
1961     token trait_mod:of {
1962         ['of'|'returns']:s <typename>
1963         [ <?{ $*DECLARAND<of> }> <.sorry("Extra 'of' type; already declared as type " ~ $*DECLARAND<of>.Str)> ]?
1964         { $*DECLARAND<of> = $<typename>; }
1965     }
1966     token trait_mod:as      { <sym>:s <typename> }
1967     token trait_mod:handles { <sym>:s <term> }
1968
1969     #########
1970     # Nouns #
1971     #########
1972
1973     # (for when you want to tell EXPR that infix already parsed the term)
1974     token nullterm {
1975         <?>
1976     }
1977
1978     token nulltermish {
1979         :dba('null term')
1980         [
1981         | <?stdstopper>
1982         | <term=.termish>
1983             {
1984                 $¢.<PRE>  = $<term><PRE>:delete;
1985                 $¢.<POST> = $<term><POST>:delete;
1986                 $¢.<~CAPS> = $<term><~CAPS>;
1987             }
1988         | <?>
1989         ]
1990     }
1991
1992     token termish {
1993         :my $*SCOPE = "";
1994         :my $*MULTINESS = "";
1995         :my $*OFTYPE;
1996         :my $*VAR;
1997         :dba('prefix or term')
1998         [
1999         | <PRE> [ <!{ my $p = $<PRE>; my @p = @$p; @p[*-1]<O><term> and $<term> = pop @$p }> <PRE> ]*
2000             [ <?{ $<term> }> || <term> || <.panic("Prefix requires an argument")> ]
2001         | <term>
2002         ]
2003
2004         # also queue up any postfixes
2005         :dba('postfix')
2006         [
2007         || <?{ $*QSIGIL }>
2008             [
2009             || <?{ $*QSIGIL eq '$' }> [ [<!before '\\'> <POST>]+! <?after <[ \] } > ) ]> > ]?
2010             ||                          [<!before '\\'> <POST>]+! <?after <[ \] } > ) ]> >
2011             || { $*VAR = 0; }
2012             ]
2013         || <!{ $*QSIGIL }>
2014             <POST>*
2015         ]
2016         {
2017             self.check_variable($*VAR) if $*VAR;
2018             $¢.<~CAPS> = $<term><~CAPS>;
2019         }
2020     }
2021
2022     token term:fatarrow           { <fatarrow> }
2023     token term:variable           { <variable> { $*VAR = $<variable> } }
2024     token term:package_declarator { <package_declarator> }
2025     token term:scope_declarator   { <scope_declarator> }
2026     token term:multi_declarator   { <?before 'multi'|'proto'|'only'> <multi_declarator> }
2027     token term:routine_declarator { <routine_declarator> }
2028     token term:regex_declarator   { <regex_declarator> }
2029     token term:type_declarator    { <type_declarator> }
2030     token term:circumfix          { <circumfix> }
2031     token term:dotty              { <dotty> }
2032     token term:value              { <value> }
2033     token term:capterm            { <capterm> }
2034     token term:sigterm            { <sigterm> }
2035     token term:statement_prefix   { <statement_prefix> }
2036     token term:colonpair          { [ <colonpair> <.ws> ]+ }
2037
2038     token fatarrow {
2039         <key=.identifier> \h* '=>' <.ws> <val=.EXPR(item %item_assignment)>
2040     }
2041
2042     token coloncircumfix ($front) {
2043         [
2044         | '<>' <.worry("Pair with <> really means a Nil value, not null string; use :$front" ~ "('') to represent the null string,\n  or :$front" ~ "() to represent Nil more accurately")>
2045         | <circumfix>
2046         ]
2047     }
2048
2049     token colonpair {
2050         :my $key;
2051         :my $value;
2052
2053         ':'
2054         :dba('colon pair')
2055         [
2056         | '!' :: [ <identifier> || <.panic: "Malformed False pair; expected identifier">]
2057             [ <?before <[ \[ \( \< \{ ]>> <.panic: "Extra argument not allowed; pair already has False argument"> ]?
2058             { $key = $<identifier>.Str; $value = 0; }
2059         | $<num> = [\d+] <identifier> [ <?before <[ \[ \( \< \{ ]>> <.sorry("Extra argument not allowed; pair already has argument of " ~ $<num>.Str)> <.circumfix> ]?
2060         | <identifier>
2061             { $key = $<identifier>.Str; }
2062             [
2063             || <.unsp>? :dba('pair value') <coloncircumfix($key)> { $value = $<coloncircumfix>; }
2064             || { $value = 1; }
2065             ]
2066         | :dba('signature') '(' ~ ')' <fakesignature>
2067         | <coloncircumfix('')>
2068             { $key = ""; $value = $<coloncircumfix>; }
2069         | $<var> = (
2070                 <sigil> {}
2071                 [
2072                 | <twigil>? <desigilname>
2073                 | '<' <desigilname> '>'
2074                 ]
2075             )
2076             { $key = $<var><desigilname>.Str; $value = $<var>; $¢.check_variable($value); }
2077         ]
2078         { $<k> = $key; $<v> = $value; }
2079     }
2080
2081     # Most of these special variable rules are there simply to catch old p5 brainos
2082
2083     token special_variable:sym<$¢> { <sym> }
2084
2085     token special_variable:sym<$!> { <sym> <!before \w> }
2086
2087     token special_variable:sym<$!{ }> {
2088         '$!' '{' ~ '}' [<identifier> | <statementlist>]
2089         {{
2090             my $all = substr($*ORIG, self.pos, $¢.pos - self.pos);
2091             my ($inside) = $all ~~ m!^...\s*(.*?)\s*.$!;
2092             $¢.obs("Perl 5's $all construct", "a smartmatch like \$! ~~ $inside" );
2093         }}
2094     }
2095
2096     token special_variable:sym<$/> {
2097         <sym>
2098         # XXX assuming nobody ever wants to assign $/ directly anymore...
2099         [ <?before \h* '=' <![=]> >
2100             <.obs('$/ variable as input record separator',
2101                  "the filehandle's :irs attribute")>
2102         ]?
2103     }
2104
2105     token special_variable:sym<$~> {
2106         <sym> :: <?before \s | ',' | '=' | <terminator> >
2107         <.obs('$~ variable', 'Form module')>
2108     }
2109
2110     token special_variable:sym<$`> {
2111         <sym> :: <?before \s | ',' | <terminator> >
2112         <.obs('$` variable', 'explicit pattern before <(')>
2113     }
2114
2115     token special_variable:sym<$@> {
2116         <sym> <?before \W> ::
2117         <.obs('$@ variable as eval error', '$!')>
2118     }
2119
2120     token special_variable:sym<$#> {
2121         <sym> ::
2122         [
2123         || (\w+) <.obs("\$#" ~ $0.Str ~ " variable", '@' ~ $0.Str ~ '.end')>
2124         || <.obs('$# variable', '.fmt')>
2125         ]
2126     }
2127     token special_variable:sym<$$> {
2128         <sym> <!alpha> :: <?before \s | ',' | <terminator> >
2129         <.obs('$$ variable', '$*PID')>
2130     }
2131     token special_variable:sym<$%> {
2132         <sym> <!before \w> <!sigil> ::
2133         <.obs('$% variable', 'Form module')>
2134     }
2135
2136     # Note: this works because placeholders are restricted to lowercase
2137     token special_variable:sym<$^X> {
2138         <sigil> '^' $<letter> = [<[A..Z]>] \W
2139         <.obscaret($<sigil>.Str ~ '^' ~ $<letter>.Str, $<sigil>.Str, $<letter>.Str)>
2140     }
2141
2142     token special_variable:sym<$^> {
2143         <sym> :: <?before \s | ',' | '=' | <terminator> >
2144         <.obs('$^ variable', 'Form module')>
2145     }
2146
2147     token special_variable:sym<$&> {
2148         <sym> :: <?before \s | ',' | <terminator> >
2149         <.obs('$& variable', '$/ or $()')>
2150     }
2151
2152     token special_variable:sym<$*> {
2153         <sym> :: <?before \s | ',' | '=' | <terminator> >
2154         <.obs('$* variable', '^^ and $$')>
2155     }
2156
2157     token special_variable:sym<$)> {
2158         <sym> <?{ $*GOAL ne ')' }> <?before \s | ',' | <terminator> >
2159         <.obs('$) variable', '$*EGID')>
2160     }
2161
2162     token special_variable:sym<$-> {
2163         <sym> :: <?before \s | ',' | '=' | <terminator> >
2164         <.obs('$- variable', 'Form module')>
2165     }
2166
2167     token special_variable:sym<$=> {
2168         <sym> :: <?before \s | ',' | '=' | <terminator> >
2169         <.obs('$= variable', 'Form module')>
2170     }
2171
2172     token special_variable:sym<@+> {
2173         <sym> :: <?before \s | ',' | <terminator> >
2174         <.obs('@+ variable', '.to method')>
2175     }
2176
2177     token special_variable:sym<%+> {
2178         <sym> :: <?before \s | ',' | <terminator> >
2179         <.obs('%+ variable', '.to method')>
2180     }
2181
2182     token special_variable:sym<$+[ ]> {
2183         '$+['
2184         <.obs('@+ variable', '.to method')>
2185     }
2186
2187     token special_variable:sym<@+[ ]> {
2188         '@+['
2189         <.obs('@+ variable', '.to method')>
2190     }
2191
2192     token special_variable:sym<@+{ }> {
2193         '@+{'
2194         <.obs('%+ variable', '.to method')>
2195     }
2196
2197     token special_variable:sym<@-> {
2198         <sym> :: <?before \s | ',' | <terminator> >
2199         <.obs('@- variable', '.from method')>
2200     }
2201
2202     token special_variable:sym<%-> {
2203         <sym> :: <?before \s | ',' | <terminator> >
2204         <.obs('%- variable', '.from method')>
2205     }
2206
2207     token special_variable:sym<$-[ ]> {
2208         '$-['
2209         <.obs('@- variable', '.from method')>
2210     }
2211
2212     token special_variable:sym<@-[ ]> {
2213         '@-['
2214         <.obs('@- variable', '.from method')>
2215     }
2216
2217     token special_variable:sym<%-{ }> {
2218         '@-{'
2219         <.obs('%- variable', '.from method')>
2220     }
2221
2222     token special_variable:sym<$+> {
2223         <sym> :: <?before \s | ',' | <terminator> >
2224         <.obs('$+ variable', 'Form module')>
2225     }
2226
2227     token special_variable:sym<${^ }> {
2228         <sigil> '{^' :: $<text>=[.*?] '}'
2229         <.obscaret($<sigil>.Str ~ '{^' ~ $<text>.Str ~ '}', $<sigil>.Str, $<text>.Str)>
2230     }
2231
2232     # XXX should eventually rely on multi instead of nested cases here...
2233     method obscaret (Str $var, Str $sigil, Str $name) {
2234         my $repl;
2235         given $sigil {
2236             when '$' {
2237                 given $name {
2238                     when 'MATCH'         { $repl = '$/' }
2239                     when 'PREMATCH'      { $repl = 'an explicit pattern before <(' }
2240                     when 'POSTMATCH'     { $repl = 'an explicit pattern after )>' }
2241                     when 'ENCODING'      { $repl = '$?ENCODING' }
2242                     when 'UNICODE'       { $repl = '$?UNICODE' }  # XXX ???
2243                     when 'TAINT'         { $repl = '$*TAINT' }
2244                     when 'OPEN'          { $repl = 'filehandle introspection' }
2245                     when 'N'             { $repl = '$-1' } # XXX ???
2246                     when 'L'             { $repl = 'Form module' }
2247                     when 'A'             { $repl = 'Form module' }
2248                     when 'E'             { $repl = '$!.extended_os_error' }
2249                     when 'C'             { $repl = 'COMPILING namespace' }
2250                     when 'D'             { $repl = '$*DEBUGGING' }
2251                     when 'F'             { $repl = '$*SYSTEM_FD_MAX' }
2252                     when 'H'             { $repl = '$?FOO variables' }
2253                     when 'I'             { $repl = '$*INPLACE' } # XXX ???
2254                     when 'O'             { $repl = '$?OS or $*OS' }
2255                     when 'P'             { $repl = 'whatever debugger Perl 6 comes with' }
2256                     when 'R'             { $repl = 'an explicit result variable' }
2257                     when 'S'             { $repl = 'the context function' } # XXX ???
2258                     when 'T'             { $repl = '$*BASETIME' }
2259                     when 'V'             { $repl = '$*PERL_VERSION' }
2260                     when 'W'             { $repl = '$*WARNING' }
2261                     when 'X'             { $repl = '$*EXECUTABLE_NAME' }
2262                     when *               { $repl = "a global form such as $sigil*$name" }
2263                 }
2264             }
2265             when '%' {
2266                 given $name {
2267                     when 'H'             { $repl = '$?FOO variables' }
2268                     when *               { $repl = "a global form such as $sigil*$name" }
2269                 }
2270             }
2271             when * { $repl = "a global form such as $sigil*$name" }
2272         };
2273         return self.obs("$var variable", $repl);
2274     }
2275
2276     token special_variable:sym<::{ }> {
2277         '::' <?before '{'>
2278     }
2279
2280     regex special_variable:sym<${ }> {
2281         <sigil> '{' {} $<text>=[.*?] '}'
2282         {{
2283             my $sigil = $<sigil>.Str;
2284             my $text = $<text>.Str;
2285             my $bad = $sigil ~ '{' ~ $text ~ '}';
2286             $text = $text - 1 if $text ~~ /^\d+$/;
2287             if $text !~~ /^(\w|\:)+$/ {
2288                 return () if $*QSIGIL;
2289                 $¢.obs($bad, $sigil ~ '(' ~ $text ~ ')');
2290             }
2291             elsif $*QSIGIL {
2292                 $¢.obs($bad, '{' ~ $sigil ~ $text ~ '}');
2293             }
2294             else {
2295                 $¢.obs($bad, $sigil ~ $text);
2296             }
2297         }} # always fails, don't need curlycheck here
2298     }
2299
2300     token special_variable:sym<$[> {
2301         <sym> :: <?before \s | ',' | '=' | <terminator> >
2302         <.obs('$[ variable', 'user-defined array indices')>
2303     }
2304
2305     token special_variable:sym<$]> {
2306         <sym> :: <?before \s | ',' | <terminator> >
2307         <.obs('$] variable', '$*PERL_VERSION')>
2308     }
2309
2310     token special_variable:sym<$\\> {
2311         <sym> :: <?before \s | ',' | '=' | <terminator> >
2312         <.obs('$\\ variable', "the filehandle's :ors attribute")>
2313     }
2314
2315     token special_variable:sym<$|> {
2316         <sym> :: <?before \s | ',' | '=' | <terminator> >
2317         <.obs('$| variable', ':autoflush on open')>
2318     }
2319
2320     token special_variable:sym<$:> {
2321         <sym> <?before <[\x20\t\n\],=)}]> >
2322         <.obs('$: variable', 'Form module')>
2323     }
2324
2325     token special_variable:sym<$;> {
2326         <sym> :: <?before \s | ',' | '=' | <terminator> >
2327         <.obs('$; variable', 'real multidimensional hashes')>
2328     }
2329
2330     token special_variable:sym<$'> { #'
2331         <sym> :: <?before \s | ',' | <terminator> >
2332         <.obs('$' ~ "'" ~ 'variable', "explicit pattern after )\x3E")>
2333     }
2334
2335     token special_variable:sym<$"> {
2336         <sym> <!{ $*QSIGIL }>
2337         :: <?before \s | ',' | '=' | <terminator> >
2338         <.obs('$" variable', '.join() method')>
2339     }
2340
2341     token special_variable:sym<$,> {
2342         <sym> :: <?before \s | ',' | <terminator> >
2343         <.obs('$, variable', ".join() method")>
2344     }
2345
2346     token special_variable:sym['$<'] {
2347         <sym> <?before \h* <[ = , ; ? : ! ) \] } ]> <!before \S* '>'> >
2348         <.obs('$< variable', '$*UID')>
2349     }
2350
2351     token special_variable:sym«\$>» {
2352         <sym> :: <?before \s | ',' | <terminator> >
2353         <.obs('$> variable', '$*EUID')>
2354     }
2355
2356     token special_variable:sym<$.> {
2357         <sym> :: <?before \s | ',' | <terminator> >
2358         <.obs('$. variable', "the filehandle's .line method")>
2359     }
2360
2361     token special_variable:sym<$?> {
2362         <sym> :: <?before \s | ',' | <terminator> >
2363         <.obs('$? variable as child error', '$!')>
2364     }
2365
2366     # desigilname should only follow a sigil/twigil
2367
2368     token desigilname {
2369         [
2370         | <?before '$' > <variable> { $*VAR = $<variable>; self.check_variable($*VAR) if substr($*VAR,1,1) ne '$' }
2371         | <?before <[\@\%\&]> <sigil>* \w > <.panic: "Invalid hard reference syntax">
2372         | <longname>
2373         ]
2374     }
2375
2376     token variable {
2377         :my $*IN_META = '';
2378         :my $sigil = '';
2379         :my $twigil = '';
2380         :my $name;
2381         <?before <sigil> {
2382             $sigil = $<sigil>.Str;
2383             $*LEFTSIGIL ||= $sigil;
2384         }> {}
2385         [
2386         || <sigil> <twigil>? <?before '::' [ '{' | '<' | '(' ]> <longname> # XXX
2387         || '&'
2388             [
2389             | <twigil>? <sublongname> { $name = $<sublongname>.Str }
2390             | :dba('infix noun') '[' ~ ']' <infixish('[]')>
2391             ]
2392         || '$::' <name>? # XXX
2393         || '$:' <name> # XXX
2394         || [
2395             | <sigil> <twigil>? <desigilname> { $name = $<desigilname>.Str }
2396             | <special_variable>
2397             | <sigil> <index=.decint>
2398             # Note: $() can also parse as contextualizer in an expression; should have same effect
2399             | <sigil> <?before '<' | '('> <postcircumfix>
2400             | <sigil> <?{ $*IN_DECL }>
2401             | <?> {{
2402                 if $*QSIGIL {
2403                     return ();
2404                 }
2405                 else {
2406                     $¢.sorry("Non-declarative sigil is missing its name");
2407                 }
2408               }}
2409             ]
2410         ]
2411
2412         { my $t = $<twigil>; $twigil = $t.[0].Str if @$t; }
2413         [ <?{ $twigil eq '.' }>
2414             [<.unsp> | '\\' | <?> ] <?before '('> <postcircumfix>
2415         ]?
2416     }
2417
2418
2419
2420     token deflongname {
2421         :dba('new name to be defined')
2422         <name>
2423         [
2424         | <colonpair>+ { $¢.add_categorical(substr($*ORIG, self.pos, $¢.pos - self.pos)) if $*IN_DECL; }
2425         | { $¢.add_routine($<name>.Str) if $*IN_DECL; }
2426         ]
2427     }
2428
2429     token subshortname {
2430         [
2431         | <category> <colonpair>+
2432         | <desigilname>
2433         ]
2434     }
2435
2436     token sublongname {
2437         <subshortname> <sigterm>?
2438     }
2439
2440     token value:quote   { <quote> }
2441     token value:number  { <number> }
2442     token value:version { <version> }
2443
2444     # Note: call this only to use existing type, not to declare type
2445     token typename {
2446         [
2447         | '::?'<identifier>                 # parse ::?CLASS as special case
2448         | <longname>
2449           <?{{
2450             my $longname = $<longname>.Str;
2451             if substr($longname, 0, 2) eq '::' {
2452                 $¢.add_my_name(substr($longname, 2));
2453             }
2454             else {
2455                 $¢.is_name($longname)
2456             }
2457           }}>
2458         ]
2459         # parametric type?
2460         <.unsp>? [ <?before '['> <param=.postcircumfix> ]?
2461         <.unsp>? [ <?before '{'> <whence=.postcircumfix> ]?
2462         [<.ws> 'of' <.ws> <typename> ]?
2463     }
2464
2465     # Note, does not include <1/2> forms, which are parsed as quotewords
2466
2467     token number {
2468         [
2469         | 'NaN' »
2470         | <integer>
2471         | <dec_number>
2472         | <rad_number>
2473         | 'Inf' »
2474         ]
2475     }
2476
2477     # <strtonum> is (we hope) used only by Str --> Num conversions
2478     #  (such as those done dwimmily by quotewords)
2479     token strtonum:rational { <[+\-]>?<nu=.integer>'/'<de=.integer> }
2480     token strtonum:complex { [<[+\-]>?<re=.number>]? <[+\-]><im=.number>'\\'?'i' }
2481     token strtonum:number { <[+\-]>?<number> }
2482
2483     ##########
2484     # Quotes #
2485     ##########
2486
2487     token sibble ($l, $lang2) {
2488         :my ($lang, $start, $stop);
2489         <babble($l)>
2490         { my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
2491
2492         $start <left=.nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
2493         [ <?{ $start ne $stop }>
2494             <.ws>
2495             [ <infixish> || <panic: "Missing assignment operator"> ]
2496             [ <?{ $<infixish>.Str eq '=' || $<infixish>.<infix_postfix_meta_operator> }> || <.panic: "Malformed assignment operator"> ]
2497             <.ws>
2498             <right=EXPR(item %item_assignment)>
2499         ||
2500             { $lang = $lang2.unbalanced($stop); }
2501             <right=.nibble($lang)> $stop || <.panic: "Malformed replacement part; couldn't find final $stop">
2502         ]
2503     }
2504
2505     token tribble ($l, $lang2 = $l) {
2506         :my ($lang, $start, $stop);
2507         :my $*CCSTATE = '';
2508         <babble($l)>
2509         { my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
2510
2511         $start <left=.nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
2512         { $*CCSTATE = ''; }
2513         [ <?{ $start ne $stop }>
2514             <.ws> <quibble($lang2)>
2515         ||
2516             { $lang = $lang2.unbalanced($stop); }
2517             <right=.nibble($lang)> $stop || <.panic: "Malformed replacement part; couldn't find final $stop">
2518         ]
2519     }
2520
2521     token quasiquibble ($l) {
2522         :temp %*LANG;
2523         :my ($lang, $start, $stop);
2524         :my $*QUASIMODO = 0; # :COMPILING sets true
2525         <babble($l)>
2526         {
2527             my $B = $<babble><B>;
2528             ($lang,$start,$stop) = @$B;
2529             %*LANG<MAIN> = $lang;
2530         }
2531
2532         [
2533         || <?{ $start eq '{' }> [ :lang($lang) <block> ]
2534         || [ :lang($lang) <starter> <statementlist> [ <stopper> || <.panic: "Couldn't find terminator $stop"> ] ]
2535         ]
2536     }
2537
2538     token quote:sym<//>   {
2539         '/'\s*'/' <.sorry: "Null regex not allowed">
2540     }
2541
2542     token quote:sym</ />   {
2543         '/' <nibble( $¢.cursor_fresh( %*LANG<Regex> ).unbalanced("/") )> [ '/' || <.panic: "Unable to parse regex; couldn't find final '/'"> ]
2544         <.old_rx_mods>?
2545     }
2546
2547     # handle composite forms like qww
2548     token quote:qq {
2549         :my $qm;
2550         'qq'
2551         [
2552         | <quote_mod> » <!before '('> { $qm = $<quote_mod>.Str } <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak($qm => 1))>
2553         | » <!before '('> <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq))>
2554         ]
2555     }
2556     token quote:q {
2557         :my $qm;
2558         'q'
2559         [
2560         | <quote_mod> » <!before '('> { $qm = $<quote_mod>.Str } <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q).tweak($qm => 1))>
2561         | » <!before '('> <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q))>
2562         ]
2563     }
2564
2565     token quote:Q {
2566         :my $qm;
2567         'Q'
2568         [
2569         | <quote_mod> » <!before '('> { $qm = $<quote_mod>.Str } <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak($qm => 1))>
2570         | » <!before '('> <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ))>
2571         ]
2572     }
2573
2574     token quote_mod:w  { <sym> }
2575     token quote_mod:ww { <sym> }
2576     token quote_mod:p  { <sym> }
2577     token quote_mod:x  { <sym> }
2578     token quote_mod:to { <sym> }
2579     token quote_mod:s  { <sym> }
2580     token quote_mod:a  { <sym> }
2581     token quote_mod:h  { <sym> }
2582     token quote_mod:f  { <sym> }
2583     token quote_mod:c  { <sym> }
2584     token quote_mod:b  { <sym> }
2585
2586     token quote:rx {
2587         <sym> » <!before '('>
2588         <quibble( $¢.cursor_fresh( %*LANG<Regex> ) )>
2589         <!old_rx_mods>
2590     }
2591
2592     token quote:m  {
2593         <sym> » <!before '('>
2594         <quibble( $¢.cursor_fresh( %*LANG<Regex> ) )>
2595         <!old_rx_mods>
2596     }
2597
2598     token quote:mm {
2599         <sym> » <!before '('>
2600         <quibble( $¢.cursor_fresh( %*LANG<Regex> ).tweak(:s))>
2601         <!old_rx_mods>
2602     }
2603
2604     token quote:s {
2605         <sym> » <!before '('>
2606         <pat=.sibble( $¢.cursor_fresh( %*LANG<Regex> ), $¢.cursor_fresh( %*LANG<Q> ).tweak(:qq))>
2607         <!old_rx_mods>
2608     }
2609
2610     token quote:ss {
2611         <sym> » <!before '('>
2612         <pat=.sibble( $¢.cursor_fresh( %*LANG<Regex> ).tweak(:s), $¢.cursor_fresh( %*LANG<Q> ).tweak(:qq))>
2613         <!old_rx_mods>
2614     }
2615     token quote:tr {
2616         <sym> » <!before '('> <pat=.tribble( $¢.cursor_fresh( %*LANG<Q> ).tweak(:cc))>
2617         <!old_tr_mods>
2618     }
2619
2620     token quote:y {
2621         <sym> »
2622         # could be defined as a function or constant
2623         <!{ self.is_known('&y') or self.is_known('y') }>
2624         <!before '('> <?before \h*\W>
2625         <.obs('y///','tr///')>
2626     }
2627
2628     token old_rx_mods {
2629         <!after \s>
2630         (< i g s m x c e >+)
2631         {{
2632             given $0.Str {
2633                 $_ ~~ /i/ and $¢.worryobs('/i',':i');
2634                 $_ ~~ /g/ and $¢.worryobs('/g',':g');
2635                 $_ ~~ /m/ and $¢.worryobs('/m','^^ and $$ anchors');
2636                 $_ ~~ /s/ and $¢.worryobs('/s','. or \N');
2637                 $_ ~~ /x/ and $¢.worryobs('/x','normal default whitespace');
2638                 $_ ~~ /c/ and $¢.worryobs('/c',':c or :p');
2639                 $_ ~~ /e/ and $¢.worryobs('/e','interpolated {...} or s{} = ... form');
2640                 $¢.obs('suffix regex modifiers','prefix adverbs');
2641             }
2642         }}
2643     }
2644
2645     token old_tr_mods {
2646         (< c d s ] >+)
2647         {{
2648             given $0.Str {
2649                 $_ ~~ /c/ and $¢.worryobs('/c',':c');
2650                 $_ ~~ /d/ and $¢.worryobs('/g',':d');
2651                 $_ ~~ /s/ and $¢.worryobs('/s',':s');
2652                 $¢.obs('suffix transliteration modifiers','prefix adverbs');
2653             }
2654         }}
2655     }
2656
2657     token quote:quasi {
2658         <sym> » <!before '('> <quasiquibble($¢.cursor_fresh( %*LANG<Quasi> ))>
2659     }
2660
2661     ###########################
2662     # Captures and Signatures #
2663     ###########################
2664
2665     token capterm {
2666         '\\'
2667         [
2668         | '(' <capture>? ')'
2669         | <?before \S> <termish>
2670         | {} <.panic: "You can't backslash that">
2671         ]
2672     }
2673
2674     rule capture {
2675         :my $*INVOCANT_OK = 1;
2676         <EXPR>
2677     }
2678
2679     token sigterm {
2680         :dba('signature')
2681         ':(' ~ ')' <fakesignature>
2682     }
2683
2684     rule param_sep { [','|':'|';'|';;'] }
2685
2686     token fakesignature() {
2687         :temp $*CURLEX;
2688         :my $*DECLARAND;
2689         <.newlex>
2690         <signature>
2691     }
2692
2693     token signature ($lexsig = 0) {
2694         :my $*IN_DECL = 'sig';
2695         :my $*zone = 'posreq';
2696         :my $startpos = self.pos;
2697         :my $*MULTINESS = 'only';
2698         :my $*SIGNUM = $lexsig;
2699         <.ws>
2700         [
2701         | '\|' [ <param_var> || <.panic: "\\| signature must contain one variable"> ]
2702             <.ws> [ <?before '-->' | ')' | ']' > || <.panic: "\\| signature may contain only a variable"> ]
2703         |   [
2704             | <?before '-->' | ')' | ']' | '{' | ':'\s | ';;' >
2705             | [ <parameter> || <.panic: "Malformed parameter"> ]
2706             ] ** <param_sep>
2707         ]
2708         <.ws>
2709         { $*IN_DECL = ''; }
2710         [ '-->' <.ws> [<type_constraint> || <.panic: "No type found after -->">] <.ws> ]?
2711         {{
2712             $*LEFTSIGIL = '@';
2713             if $lexsig {
2714                 $*CURLEX.<$?SIGNATURE> ~= '|' if $lexsig > 1;
2715                 $*CURLEX.<$?SIGNATURE> ~= '(' ~ substr($*ORIG, $startpos, $¢.pos - $startpos) ~ ')';
2716                 $*CURLEX.<!NEEDSIG>:delete;
2717             }
2718         }}
2719     }
2720
2721     token type_declarator:subset {
2722         :my $*IN_DECL = 'subset';
2723         :my $*DECLARAND;
2724         <sym> :s
2725         [
2726             [ <longname> { $¢.add_name($<longname>[0].Str); } ]?
2727             <trait>*
2728             [where <EXPR(item %chaining)> ]?    # (EXPR can parse multiple where clauses)
2729         ] || <.panic: "Malformed subset">
2730     }
2731
2732     token type_declarator:enum {
2733         :my $*IN_DECL = 'enum';
2734         :my $*DECLARAND;
2735         <sym> <.ws>
2736         [
2737         | <name=longname> { $¢.add_name($<name>.Str); }
2738         | <name=variable> { $¢.add_variable($<name>.Str); }
2739         | <?>
2740         ]
2741         { $*IN_DECL = ''; }
2742         <.ws>
2743         <trait>* <?before <[ < ( « ]> > <term> <.ws>
2744             {$¢.add_enum($<name>, $<term>.Str); }
2745     }
2746
2747     token type_declarator:constant {
2748         :my $*IN_DECL = 'constant';
2749         :my $*DECLARAND;
2750         <sym> <.ws>
2751
2752         [
2753         | <identifier> { $¢.add_name($<identifier>.Str); }
2754         | <variable> { $¢.add_variable($<variable>.Str); }
2755         | <?>
2756         ]
2757         { $*IN_DECL = ''; }
2758         <.ws>
2759
2760         <trait>*
2761
2762         [
2763         || <?before '='>
2764         || <?before <-[\n=]>*'='> <.panic: "Malformed constant"> # probable initializer later
2765         || <.sorry: "Missing initializer on constant declaration">
2766         ]
2767
2768         <.getdecl>
2769     }
2770
2771
2772     token type_constraint {
2773         :my $*IN_DECL = '';
2774         [
2775         | <value>
2776         | <typename>
2777             [ <?{ $*DECLARAND<of> }> <.sorry("Extra 'of' type; already declared as type " ~ $*DECLARAND<of>.Str)> ]?
2778             { $*DECLARAND<of> = $<typename>; }
2779         | where <.ws> <EXPR(item %chaining)>
2780         ]
2781         <.ws>
2782     }
2783
2784     rule post_constraint {
2785         :my $*IN_DECL = '';
2786         :dba('constraint')
2787         [
2788         | '[' ~ ']' <signature>
2789         | '(' ~ ')' <signature>
2790         | where <EXPR(item %chaining)>
2791         ]
2792     }
2793
2794     token named_param {
2795         :my $*GOAL ::= ')';
2796         ':'
2797         [
2798         | <name=.identifier> '(' <.ws>
2799             [ <named_param> | <param_var> <.ws> ]
2800             [ ')' || <.panic: "Unable to parse named parameter; couldn't find right parenthesis"> ]
2801         | <param_var(1)>
2802         ]
2803     }
2804
2805     token param_var($named = 0) {
2806         :dba('formal parameter')
2807         [
2808         | '[' ~ ']' <signature>
2809         | '(' ~ ')' <signature>
2810         | <sigil> <twigil>?
2811             [
2812                 # Is it a longname declaration?
2813             || <?{ $<sigil>.Str eq '&' }> <?ident> {}
2814                 <name=.sublongname>
2815
2816             ||  # Is it a shaped array or hash declaration?
2817                 <?{ $<sigil>.Str eq '@' || $<sigil>.Str eq '%' }>
2818                 <name=.identifier>?
2819                 <?before <[ \< \( \[ \{ ]> >
2820                 <postcircumfix>
2821
2822                 # ordinary parameter name
2823             || <name=.identifier>
2824             || $<name> = [<[/!]>]
2825
2826                 # bare sigil?
2827             ]?
2828             {{
2829                 my $vname = $<sigil>.Str;
2830                 my $t = $<twigil>;
2831                 my $twigil = '';
2832                 $twigil = $t.[0].Str if @$t;
2833                 $vname ~= $twigil;
2834                 my $n = try { $<name>[0].Str } // '';
2835                 $vname ~= $n;
2836                 given $twigil {
2837                     when '' {
2838                         self.add_my_name($vname) if $n ne '';
2839                         # :$param is often used as a multi matcher without $param used in body
2840                         #   so don't count as "declared but not used"
2841                         $*CURLEX{$vname}<used> = 1 if $named and $n;
2842                     }
2843                     when '.' {
2844                     }
2845                     when '!' {
2846                     }
2847                     when '*' {
2848                     }
2849                     default {
2850                         self.panic("You may not use the $twigil twigil in a signature");
2851                     }
2852                 }
2853             }}
2854         ]
2855     }
2856
2857     token parameter {
2858         :my $kind;
2859         :my $quant = '';
2860         :my $*DECLARAND;
2861         :my $*OFTYPE;
2862
2863         [
2864         | <type_constraint>+
2865             {{
2866                 my $t = $<type_constraint>;
2867                 my @t = grep { substr($_.Str,0,2) ne '::' }, @$t;
2868                 @t > 1 and $¢.sorry("Multiple prefix constraints not yet supported")
2869             }}
2870             [
2871             | '**' <param_var>   { $quant = '**'; $kind = '*'; }
2872             | '*' <param_var>   { $quant = '*'; $kind = '*'; }
2873             | '|' <param_var>   { $quant = '|'; $kind = '*'; }
2874             | '\\' <param_var>  { $quant = '\\'; $kind = '!'; }
2875             |   [
2876                 | <param_var>   { $quant = ''; $kind = '!'; }
2877                 | <named_param> { $quant = ''; $kind = '*'; }
2878                 ]
2879                 [
2880                 | '?'           { $quant = '?'; $kind = '?' }
2881                 | '!'           { $quant = '!'; $kind //= '!' }
2882                 | <?>
2883                 ]
2884             | <?> { $quant = ''; $kind = '!' }
2885             ]
2886
2887         | '**' <param_var>   { $quant = '**'; $kind = '*'; }
2888         | '*' <param_var>   { $quant = '*'; $kind = '*'; }
2889         | '|' <param_var>   { $quant = '|'; $kind = '*'; }
2890         | '\\' <param_var>  { $quant = '\\'; $kind = '!'; }
2891         |   [
2892             | <param_var>   { $quant = ''; $kind = '!'; }
2893             | <named_param> { $quant = ''; $kind = '*'; }
2894             ]
2895             [
2896             | '?'           { $quant = '?'; $kind = '?' }
2897             | '!'           { $quant = '!'; $kind //= '!' }
2898             | <?>
2899             ]
2900         | {} <longname> <.panic("In parameter declaration, typename '" ~ $<longname>.Str ~ "' must be predeclared (or marked as declarative with :: prefix)")>
2901         ]
2902
2903         <trait>*
2904
2905         <post_constraint>*
2906
2907         <.getdecl>
2908
2909         [
2910             <default_value> {{
2911                 given $quant {
2912                   when '!' { $¢.sorry("Can't put a default on a required parameter") }
2913                   when '*' { $¢.sorry("Can't put a default on a slurpy parameter") }
2914                   when '**' { $¢.sorry("Can't put a default on a slice parameter") }
2915                   when '|' { $¢.sorry("Can't put a default on an slurpy capture parameter") }
2916                   when '\\' { $¢.sorry("Can't put a default on a capture parameter") }
2917                 }
2918                 $kind = '?' if $kind eq '!';
2919             }}
2920             [<?before ':' > <.sorry: "Can't put a default on the invocant parameter">]?
2921             [<!before <[,;)\]\{\-]> > <.sorry: "Default expression must come last">]?
2922         ]?
2923         [<?before ':'> <?{ $kind ne '!' }> <.sorry: "Invocant is too exotic">]?
2924
2925         {
2926             $<quant> = $quant;
2927             $<kind> = $kind;
2928         }
2929
2930         # enforce zone constraints
2931         {{
2932             given $kind {
2933                 when '!' {
2934                     given $*zone {
2935                         when 'posopt' {
2936     $¢.sorry("Can't put required parameter after optional parameters");
2937                         }
2938                         when 'var' {
2939     $¢.sorry("Can't put required parameter after variadic parameters");
2940                         }
2941                     }
2942                 }
2943                 when '?' {
2944                     given $*zone {
2945                         when 'posreq' { $*zone = 'posopt' }
2946                         when 'var' {
2947     $¢.sorry("Can't put optional positional parameter after variadic parameters");
2948                         }
2949                     }
2950                 }
2951                 when '*' {
2952                     $*zone = 'var';
2953                 }
2954             }
2955         }}
2956     }
2957
2958     rule default_value {
2959         :my $*IN_DECL = '';
2960         '=' <EXPR(item %item_assignment)>
2961     }
2962
2963     token statement_prefix:sink    { <sym> <blast> }
2964     token statement_prefix:try     { <sym> <blast> }
2965     token statement_prefix:quietly { <sym> <blast> }
2966     token statement_prefix:gather  { <sym> <blast> }
2967     token statement_prefix:contend { <sym> <blast> }
2968     token statement_prefix:async   { <sym> <blast> }
2969     token statement_prefix:maybe   { <sym> <blast> }
2970     token statement_prefix:lazy    { <sym> <blast> }
2971     token statement_prefix:do      { <sym> <blast> }
2972
2973     token statement_prefix:lift    {
2974         :my $*QUASIMODO = 1;
2975         <sym> <blast>
2976     }
2977
2978     # accepts blocks and statements
2979     token blast {
2980         <?before \s> <.ws>
2981         [
2982         | <block>
2983         | <statement>  # creates a dynamic scope but not lexical scope
2984         ]
2985     }
2986
2987     #########
2988     # Terms #
2989     #########
2990
2991     token term:new {
2992         'new' \h+ <longname> \h* <!before ':'> <.obs("C++ constructor syntax", "method call syntax")>
2993     }
2994
2995     token term:sym<::?IDENT> {
2996         $<sym> = [ '::?' <identifier> ] »
2997         <O(|%term)>
2998     }
2999
3000     token term:sym<Object> {
3001         <sym> » {}
3002         <.obs('Object', 'Mu as the "most universal" object type')>
3003     }
3004
3005     token term:sym<undef> {
3006         <sym> » {}
3007         [ <?before \h*'$/' >
3008             <.obs('$/ variable as input record separator',
3009                  "the filehandle's .slurp method")>
3010         ]?
3011         [ <?before [ '(' || \h*<sigil><twigil>?\w ] >
3012             <.obs('undef as a verb', 'undefine function or assignment of Nil')>
3013         ]?
3014         <.obs('undef as a value', "something more specific:\n\tMu (the \"most undefined\" type object),\n\tan undefined type object such as Int,\n\tNil as an empty list,\n\t:!defined as a matcher,\n\tAny:U as a type constraint\n\tor fail() as a failure return\n\t   ")>
3015     }
3016
3017     token term:sym<proceed>
3018         { <sym> » <O(|%term)> }
3019
3020     token term:sym<time>
3021         { <sym> » <O(|%term)> }
3022
3023     token term:sym<now>
3024         { <sym> » <O(|%term)> }
3025
3026     token term:sym<self>
3027         { <sym> » <O(|%term)> }
3028
3029     token term:sym<defer>
3030         { <sym> » <O(|%term)> }
3031
3032     token term:rand {
3033         <sym> »
3034         [ <?before '('? \h* [\d|'$']> <.obs('rand(N)', 'N.rand or (1..N).pick')> ]?
3035         [ <?before '()'> <.obs('rand()', 'rand')> ]?
3036         <O(|%term)>
3037     }
3038
3039     token term:sym<*>
3040         { <sym> <O(|%term)> }
3041
3042     token term:sym<**>
3043         { <sym> <O(|%term)> }
3044
3045     token infix:lambda {
3046         <?before '{' | '->' > <!{ $*IN_META }> {{
3047             my $needparens = 0;
3048             my $line = $¢.lineof($¢.pos);
3049             for 'if', 'unless', 'while', 'until', 'for', 'given', 'when', 'loop', 'sub', 'method' {
3050                 $needparens++ if $_ eq 'loop';
3051                 my $m = %*MYSTERY{$_};
3052                 next unless $m;
3053                 if $line - ($m.<line>//-123) < 5 {
3054                     if $m.<ctx> eq '(' {
3055                         $¢.panic("Word '$_' interpreted as '$_" ~ "()' function call; please use whitespace " ~
3056                         ($needparens ?? 'around the parens' !! 'instead of parens') ~ $m<token>.locmess ~
3057                         "\nUnexpected block in infix position (two terms in a row)");
3058                     }
3059                     else {
3060                         $¢.panic("Word '$_' interpreted as a listop; please use 'do $_' to introduce the statement control word" ~ $m<token>.cursor($m<token>.from).locmess ~
3061                         "\nUnexpected block in infix position (two terms in a row)");
3062                     }
3063                 }
3064             }
3065             return () if $*IN_REDUCE;
3066             my $endpos = $¢.pos;
3067             my $startpos = @*MEMOS[$endpos]<ws> // $endpos;
3068
3069             if self.lineof($startpos) != self.lineof($endpos) {
3070                 $¢.panic("Unexpected block in infix position (previous line missing its semicolon?)");
3071             }
3072             elsif @*MEMOS[$¢.pos-1]<baremeth> {
3073                 $¢.panic("Unexpected block in infix position (method call needs colon or parens to take arguments)");
3074             }
3075             else {
3076                 $¢.panic("Unexpected block in infix position (two terms in a row, or previous statement missing semicolon?)");
3077             }
3078         }}
3079         <O(|%term)>
3080     }
3081
3082     token circumfix:sigil
3083         { :dba('contextualizer') <sigil> '(' ~ ')' <semilist> { $*LEFTSIGIL ||= $<sigil>.Str } <O(|%term)> }
3084
3085     token circumfix:sym<( )>
3086         { :dba('parenthesized expression') '(' ~ ')' <semilist> <O(|%term)> }
3087
3088     token circumfix:sym<[ ]>
3089         { :dba('array composer') '[' ~ ']' <semilist> <O(|%term)> { @*MEMOS[$¢.pos]<arraycomp> = 1; } }
3090
3091     #############
3092     # Operators #
3093     #############
3094
3095     token PRE {
3096         :dba('prefix or meta-prefix')
3097         [
3098         | <prefix>
3099             { $<O> = $<prefix><O>; $<sym> = $<prefix><sym> }
3100         | <prefix_circumfix_meta_operator>
3101             { $<O> = $<prefix_circumfix_meta_operator><O>; $<sym> = $<prefix_circumfix_meta_operator>.Str }
3102         ]
3103         # XXX assuming no precedence change
3104
3105         <prefix_postfix_meta_operator>*
3106         <.ws>
3107     }
3108
3109     token infixish ($in_meta = $*IN_META) {
3110         :my $infix;
3111         :my $*IN_META = $in_meta;
3112         <!stdstopper>
3113         <!infixstopper>
3114         :dba('infix or meta-infix')
3115         [
3116         | <colonpair> {
3117                 $<fake> = 1;
3118                 $<sym> = ':';
3119                 %<O><prec> = %item_assignment<prec>;  # actual test is non-inclusive!
3120                 %<O><assoc> = 'unary';
3121                 %<O><dba> = 'adverb';
3122             }
3123         |   [
3124             | :dba('bracketed infix') '[' ~ ']' <infix=.infixish('[]')> { $<O> = $<infix><O>; $<sym> = $<infix><sym>; }
3125             | <infix=infix_circumfix_meta_operator> { $<O> = $<infix><O>; $<sym> = $<infix><sym>; }
3126             | <infix=infix_prefix_meta_operator>    { $<O> = $<infix><O>; $<sym> = $<infix><sym>; }
3127             | <infix>                               { $<O> = $<infix><O>; $<sym> = $<infix><sym>; }
3128             | {} <?dotty> <.panic: "Method call found where infix expected (omit whitespace?)">
3129             | {} <?postfix> <.panic: "Postfix found where infix expected (omit whitespace?)">
3130             ]
3131             [ <?before '='> <?{ $infix = $<infix>; }> <infix_postfix_meta_operator($infix)>
3132                    { $<O> = $<infix_postfix_meta_operator>[0]<O>; $<sym> = $<infix_postfix_meta_operator>[0]<sym>; }
3133             ]?
3134
3135         ]
3136     }
3137
3138     # NOTE: Do not add dotty ops beginning with anything other than dot!
3139     #   Dotty ops have to parse as .foo terms as well, and almost anything
3140     #   other than dot will conflict with some other prefix.
3141
3142     # doing fancy as one rule simplifies LTM
3143     token dotty:sym<.*> {
3144         ('.' [ <[+*?=]> | '^' '!'? ]) :: <.unspacey> <dottyop>
3145         { $<sym> = $0.Str; }
3146         <O(%methodcall)>
3147     }
3148
3149     token dotty:sym<.> {
3150         <sym> <dottyop>
3151         <O(%methodcall)>
3152     }
3153
3154     token privop {
3155         '!' <methodop>
3156         <O(%methodcall)>
3157     }
3158
3159     token dottyopish {
3160         <term=.dottyop>
3161     }
3162
3163     token dottyop {
3164         :dba('dotty method or postfix')
3165         [
3166         | <methodop>
3167         | <colonpair>
3168         | <!alpha> <postop> { $<O> = $<postop><O>; $<sym> = $<postop><sym>; }  # only non-alpha postfixes have dotty form
3169         ]
3170     }
3171
3172     # Note, this rule mustn't do anything irreversible because it's used
3173     # as a lookahead by the quote interpolator.
3174
3175     token POST {
3176         <!stdstopper>
3177
3178         # last whitespace didn't end here
3179         <!{ @*MEMOS[$¢.pos]<ws> }>
3180
3181         [ <.unsp> | '\\' ]?
3182
3183         [ ['.' <.unsp>?]? <postfix_prefix_meta_operator> <.unsp>? ]*
3184
3185         :dba('postfix')
3186         [
3187         | <dotty>  { $<O> = $<dotty><O>;  $<sym> = $<dotty><sym>;  $<~CAPS> = $<dotty><~CAPS>; }
3188         | <privop> { $<O> = $<privop><O>; $<sym> = $<privop><sym>; $<~CAPS> = $<privop><~CAPS>; }
3189         | <postop> { $<O> = $<postop><O>; $<sym> = $<postop><sym>; $<~CAPS> = $<postop><~CAPS>; }
3190         ]
3191         { $*LEFTSIGIL = '@'; }
3192     }
3193
3194     method can_meta ($op, $meta) {
3195         !$op<O><fiddly> ||
3196             self.sorry("Can't " ~ $meta ~ " " ~ $op<sym> ~ " because " ~ $op<O><dba> ~ " operators are too fiddly");
3197         self;
3198     }
3199
3200     regex prefix_circumfix_meta_operator:reduce {
3201         :my $*IN_REDUCE = 1;
3202         :my $op;
3203         <?before '['\S+']'>
3204         $<s> = (
3205             '['
3206             [
3207             || <op=.infixish('red')> <?before ']'>
3208             || \\<op=.infixish('tri')> <?before ']'>
3209             || <!>
3210             ]
3211             ']' ['«'|<?>]
3212         )
3213         { $op = $<s><op>; }
3214
3215         <.can_meta($op, "reduce with")>
3216
3217         [
3218         || <!{ $op<O><diffy> }>
3219         || <?{ $op<O><assoc> eq 'chain' }>
3220         || <.sorry("Can't reduce with " ~ $op<sym> ~ " because " ~ $op<O><dba> ~ " operators are diffy and not chaining")>
3221         ]
3222
3223         <O($op.Opairs, |%list_prefix, assoc => 'unary', uassoc => 'left')>
3224         { $<sym> = $<s>.Str; }
3225
3226         [ <?before '('> || <?before \s+ [ <?stdstopper> { $<O><term> = 1 } ]? > || { $<O><term> = 1 } ]
3227     }
3228
3229     token prefix_postfix_meta_operator:sym< « >    { <sym> | '<<' }
3230
3231     token postfix_prefix_meta_operator:sym< » >    {
3232         [ <sym> | '>>' ]
3233         # require >>.( on interpolated hypercall so infix:«$s»($a,$b) {...} dwims
3234         [<!{ $*QSIGIL }> || <!before '('> ]
3235     }
3236
3237     token infix_prefix_meta_operator:sym<!> {
3238         <sym> <!before '!'> {} [ <infixish('neg')> || <.panic: "Negation metaoperator not followed by valid infix"> ]
3239
3240         [
3241         || <?{ $<infixish>.Str eq '=' }>
3242            <O(|%chaining)>
3243
3244         || <.can_meta($<infixish>, "negate")>
3245            <?{ $<infixish><O><iffy> }>
3246            <?{ $<O> = $<infixish><O>; }>
3247
3248         || <.panic("Can't negate " ~ $<infixish>.Str ~ " because " ~ $<infixish><O><dba> ~ " operators are not iffy enough")>
3249         ]
3250     }
3251
3252     token infix_prefix_meta_operator:sym<R> {
3253         <sym> {} <infixish('R')>
3254         <.can_meta($<infixish>, "reverse the args of")>
3255         <?{ $<O> = $<infixish><O>; }>
3256     }
3257
3258     token infix_prefix_meta_operator:sym<S> {
3259         <sym> {} <infixish('S')>
3260         <.can_meta($<infixish>, "sequence the args of")>
3261         <?{ $<O> = $<infixish><O>; }>
3262     }
3263
3264     token infix_prefix_meta_operator:sym<X> {
3265         <sym> <?before \S> {}
3266         [ <infixish('X')>
3267             <.can_meta($<infixish>[0], "cross with")>
3268             <?{ $<O> = $<infixish>[0]<O>; $<O><prec>:delete; $<sym> ~= $<infixish>[0].Str }>
3269         ]?
3270         <O(|%list_infix, self.Opairs)>
3271     }
3272
3273     token infix_prefix_meta_operator:sym<Z> {
3274         <sym> <?before \S> {}
3275         [ <infixish('Z')>
3276             <.can_meta($<infixish>[0], "zip with")>
3277             <?{ $<O> = $<infixish>[0]<O>; $<O><prec>:delete; $<sym> ~= $<infixish>[0].Str }>
3278         ]?
3279         <O(|%list_infix, self.Opairs)>
3280     }
3281
3282     token infix_circumfix_meta_operator:sym<« »> {
3283         [
3284         | '«'
3285         | '»'
3286         ]
3287         {} <infixish('hyper')> [ '«' | '»' || <.panic: "Missing « or »"> ]
3288         <.can_meta($<infixish>, "hyper with")>
3289         <?{ $<O> := $<infixish><O>; }>
3290     }
3291
3292     token infix_circumfix_meta_operator:sym«<< >>» {
3293         [
3294         | '<<'
3295         | '>>'
3296         ]
3297         {} <infixish('HYPER')> [ '<<' | '>>' || <.panic("Missing << or >>")> ]
3298         <.can_meta($<infixish>, "hyper with")>
3299         <?{ $<O> := $<infixish><O>; }>
3300     }
3301
3302     token infix_postfix_meta_operator:sym<=> ($op) {
3303         '='
3304         <.can_meta($op, "make assignment out of")>
3305         [ <!{ $op<O><diffy> }> || <.sorry("Can't make assignment out of " ~ $op<sym> ~ " because " ~ $op<O><dba> ~ " operators are diffy")> ]
3306         { $<sym> = $op<sym> ~ '='; }
3307         <O(|%item_assignment, $op.Opairs, dba => 'item assignment', iffy => 0)>
3308     }
3309
3310     token postcircumfix:sym<( )>
3311         { :dba('argument list') '(' ~ ')' <semiarglist> <O(|%methodcall)> }
3312
3313     token postcircumfix:sym<[ ]>
3314         { :dba('subscript') '[' ~ ']' <semilist> { $<semilist>.Str ~~ /^\s*\-1\s*$/ and $¢.obs("[-1] subscript to access final element","[*-1]") } <O(|%methodcall)> }
3315
3316     token postcircumfix:sym<{ }>
3317         { :dba('subscript') '{' ~ '}' <semilist> <O(|%methodcall)> <.curlycheck> }
3318
3319     token postcircumfix:sym«< >» {
3320         :my $pos;
3321         '<'
3322         { $pos = $¢.pos }
3323         [
3324         || <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q).tweak(:w).balanced('<','>'))> '>'
3325         || <?before \h* [ \d | <sigil> | ':' ] >
3326            { $¢.cursor_force($pos).panic("Whitespace required before < operator") }
3327         || { $¢.cursor_force($pos).panic("Unable to parse quote-words subscript; couldn't find right angle quote") }
3328         ]
3329         <O(|%methodcall)>
3330     }
3331
3332     token postcircumfix:sym«<< >>»
3333         { '<<' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak(:ww).balanced('<<','>>'))> [ '>>' || <.panic: "Unable to parse quote-words subscript; couldn't find right double-angle quote"> ] <O(|%methodcall)> }
3334
3335     token postcircumfix:sym<« »>
3336         { '«' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak(:ww).balanced('«','»'))> [ '»' || <.panic: "Unable to parse quote-words subscript; couldn't find right double-angle quote"> ] <O(|%methodcall)> }
3337
3338     token postop {
3339         | <postfix>         { $<O> := $<postfix><O>; $<sym> := $<postfix><sym>; }
3340         | <postcircumfix>   { $<O> := $<postcircumfix><O>; $<sym> := $<postcircumfix><sym>; }
3341     }
3342
3343     token methodop {
3344         [
3345         | <longname>
3346         | <?before '$' | '@' | '&' > <variable> { $*VAR = $<variable> }
3347         | <?before <[ ' " ]> >
3348             [ <!{$*QSIGIL}> || <!before '"' <-["]>*? \s > ] # dwim on "$foo."
3349             <quote>
3350             [ <?before '(' | '.(' | '\\'> || <.obs('. to concatenate strings or to call a quoted method', '~ to concatenate, or if you meant to call a quoted method, please supply the required parentheses')> ]
3351             { my $t = $<quote><nibble>.Str; $t ~~ /\W/ or $t ~~ /^(WHO|WHAT|WHERE|WHEN|WHY|HOW)$/ or $¢.worry("Useless use of quotes") }
3352         ] <.unsp>?
3353
3354         :dba('method arguments')
3355         [
3356         | ':' <?before \s | '{'> <!{ $*QSIGIL }> <arglist>
3357         | <?[\\(]> <args>
3358         | { @*MEMOS[$¢.pos]<baremeth> = 1 }
3359         ]?
3360     }
3361
3362     token semiarglist {
3363         <arglist> ** ';'
3364         <.ws>
3365     }
3366
3367     token arglist {
3368         :my $inv_ok = $*INVOCANT_OK;
3369         :my StrPos $*endargs = 0;
3370         :my $*GOAL ::= 'endargs';
3371         :my $*QSIGIL ::= '';
3372         <.ws>
3373         :dba('argument list')
3374         [
3375         | <?stdstopper>
3376         | <EXPR(item %list_prefix)> {{
3377                 my $delims = $<EXPR><delims>;
3378                 for @$delims {
3379                     if $_.<infix><wascolon> // '' {
3380                         if $inv_ok {
3381                             $*INVOCANT_IS = $<EXPR><list>[0];
3382                         }
3383                     }
3384                 }
3385             }}
3386         ]
3387     }
3388
3389     token term:lambda {
3390         <?before <.lambda> >
3391         <pblock>
3392         {{
3393             if $*BORG {
3394                 $*BORG.<block> = $<pblock>;
3395             }
3396         }}
3397         <O(|%term)>
3398     }
3399
3400     token circumfix:sym<{ }> {
3401         <?before '{' >
3402         <pblock>
3403         {{
3404             if $*BORG {
3405                 $*BORG.<block> = $<pblock>;
3406             }
3407         }}
3408         <O(|%term)>
3409     }
3410
3411     ## methodcall
3412
3413     token postfix:sym<i>
3414         { <sym> » <O(|%methodcall)> }
3415
3416     token infix:sym<.> ()
3417         { '.' <[\]\)\},:\s\$"']> <.obs('. to concatenate strings', '~')> }
3418
3419     token postfix:sym['->'] () {
3420         '->'
3421         [
3422         | <brack=[ \[ \{ \( ]> <.obs("'->" ~ $<brack>.Str ~ "' as postfix dereferencer", "'." ~ $<brack>.Str ~ "' or just '" ~ $<brack>.Str ~ "' to deref, or whitespace to delimit a pointy block")>
3423         | <.obs('-> as postfix', 'either . to call a method, or whitespace to delimit a pointy block')>
3424         ]
3425     }
3426
3427     ## autoincrement
3428     token postfix:sym<++>
3429         { <sym> <O(|%autoincrement)> }
3430
3431     token postfix:sym«--» ()
3432         { <sym> <O(|%autoincrement)> }
3433
3434     token prefix:sym<++>
3435         { <sym> <O(|%autoincrement)> }
3436
3437     token prefix:sym«--» ()
3438         { <sym> <O(|%autoincrement)> }
3439
3440     ## exponentiation
3441     token infix:sym<**>
3442         { <sym> <O(|%exponentiation)> }
3443
3444     ## symbolic unary
3445     token prefix:sym<!>
3446         { <sym> <O(|%symbolic_unary)> }
3447
3448     token prefix:sym<+>
3449         { <sym> <O(|%symbolic_unary)> }
3450
3451     token prefix:sym<->
3452         { <sym> <O(|%symbolic_unary)> }
3453
3454     token prefix:sym<~~>
3455         { <sym> <.dupprefix('~~')> <O(|%symbolic_unary)> }
3456
3457     token prefix:sym<~>
3458         { <sym> <O(|%symbolic_unary)> }
3459
3460     token prefix:sym<??>
3461         { <sym> <.dupprefix('??')> <O(|%symbolic_unary)> }
3462
3463     token prefix:sym<?>
3464         { <sym> <O(|%symbolic_unary)> }
3465
3466     token prefix:sym<~^>
3467         { <sym> <O(|%symbolic_unary)> }
3468
3469     token prefix:sym<+^>
3470         { <sym> <O(|%symbolic_unary)> }
3471
3472     token prefix:sym<?^>
3473         { <sym> <O(|%symbolic_unary)> }
3474
3475     token prefix:sym<^^>
3476         { <sym> <.dupprefix('^^')> <O(|%symbolic_unary)> }
3477
3478     token prefix:sym<^>
3479         { <sym> <O(|%symbolic_unary)> }
3480
3481     token prefix:sym<||>
3482         { <sym> <O(|%symbolic_unary)> }
3483
3484     token prefix:sym<|>
3485         { <sym> <O(|%symbolic_unary)> }
3486
3487
3488     ## multiplicative
3489     token infix:sym<*>
3490         { <sym> <O(|%multiplicative)> }
3491
3492     token infix:sym</>
3493         { <sym> <O(|%multiplicative)> }
3494
3495     token infix:sym<div>
3496         { <sym> <O(|%multiplicative)> }
3497
3498     token infix:sym<%>
3499         { <sym> <O(|%multiplicative)> }
3500
3501     token infix:sym<%%>
3502         { <sym> <O(|%multiplicative, iffy => 1)> }      # "is divisible by" returns Bool
3503
3504     token infix:sym<mod>
3505         { <sym> <O(|%multiplicative)> }
3506
3507     token infix:sym<+&>
3508         { <sym> <O(|%multiplicative)> }
3509
3510     token infix:sym« << »
3511         { <sym> <!{ $*IN_META }> <?before \s> <.sorryobs('<< to do left shift', '+< or ~<')> <O(|%multiplicative)> }
3512
3513     token infix:sym« >> »
3514         { <sym> <!{ $*IN_META }> <?before \s> <.sorryobs('>> to do right shift', '+> or ~>')> <O(|%multiplicative)> }
3515
3516     token infix:sym<~&>
3517         { <sym> <O(|%multiplicative)> }
3518
3519     token infix:sym<?&>
3520         { <sym> <O(|%multiplicative, iffy => 1)> }
3521
3522     # try to allow both of >>op<< and >>op<<< without allowing op<<
3523     token infix:sym« ~< »
3524         { <sym> [ <!{ $*IN_META }> || <?before '<<'> || <!before '<'> ] <O(|%multiplicative)> }
3525
3526     token infix:sym« ~> »
3527         { <sym> [ <!{ $*IN_META }> || <?before '>>'> || <!before '>'> ] <O(|%multiplicative)> }
3528
3529     token infix:sym« +< »
3530         { <sym> [ <!{ $*IN_META }> || <?before '<<'> || <!before '<'> ] <O(|%multiplicative)> }
3531
3532     token infix:sym« +> »
3533         { <sym> [ <!{ $*IN_META }> || <?before '>>'> || <!before '>'> ] <O(|%multiplicative)> }
3534
3535     ## additive
3536     token infix:sym<+>
3537         { <sym> <!before '+'> <O(|%additive)> }
3538
3539     token infix:sym<->
3540         { <sym> <!before '-'> <O(|%additive)> }
3541
3542     token infix:sym<+|>
3543         { <sym> <O(|%additive)> }
3544
3545     token infix:sym<+^>
3546         { <sym> <O(|%additive)> }
3547
3548     token infix:sym<~|>
3549         { <sym> <O(|%additive)> }
3550
3551     token infix:sym<~^>
3552         { <sym> <O(|%additive)> }
3553
3554     token infix:sym<?|>
3555         { <sym> <O(|%additive, iffy => 1)> }
3556
3557     token infix:sym<?^>
3558         { <sym> <O(|%additive)> }
3559
3560     ## replication
3561     # Note: no word boundary check after x, relies on longest token for x2 xx2 etc
3562     token infix:sym<x>
3563         { <sym> <O(|%replication)> }
3564
3565     token infix:sym<xx>
3566         { <sym> <O(|%replication)> }
3567
3568     ## concatenation
3569     token infix:sym<~>
3570         { <sym> <O(|%concatenation)> }
3571
3572
3573     ## junctive and (all)
3574     token infix:sym<&>
3575         { <sym> <O(|%junctive_and, iffy => 1)> }
3576
3577
3578     ## junctive or (any)
3579     token infix:sym<|>
3580         { <sym> <O(|%junctive_or, iffy => 1)> }
3581
3582     token infix:sym<^>
3583         { <sym> <O(|%junctive_or, iffy => 1)> }
3584
3585
3586     ## named unary examples
3587     # (need \s* to win LTM battle with listops)
3588     token prefix:sleep
3589         { <sym> » <?before \s*> <O(|%named_unary)> }
3590
3591     token prefix:abs
3592         { <sym> » <?before \s*> <O(|%named_unary)> }
3593
3594     token prefix:let
3595         { <sym> » <?before \s*> <O(|%named_unary)> }
3596
3597     token prefix:temp
3598         { <sym> » <?before \s*> <O(|%named_unary)> }
3599
3600
3601     ## structural infix
3602     token infix:sym« <=> »
3603         { <sym> <O(|%structural, returns => 'Order')> }
3604
3605     token infix:cmp
3606         { <sym> <O(|%structural, returns => 'Order')> }
3607
3608     token infix:leg
3609         { <sym> <O(|%structural, returns => 'Order')> }
3610
3611     token infix:but
3612         { <sym> <O(|%structural)> }
3613
3614     token infix:does
3615         { <sym> <O(|%structural)> }
3616
3617     token infix:sym<..>
3618         { <sym> [<!{ $*IN_META }> <?before ')' | ']'> <.panic: "Please use ..* for indefinite range">]? <O(|%structural)> }
3619
3620     token infix:sym<^..>
3621         { <sym> <O(|%structural)> }
3622
3623     token infix:sym<..^>
3624         { <sym> <O(|%structural)> }
3625
3626     token infix:sym<^..^>
3627         { <sym> <O(|%structural)> }
3628
3629
3630     ## chaining binary
3631     token infix:sym<==>
3632         { <sym> <!before '=' > <O(|%chaining)> }
3633
3634     token infix:sym<!=>
3635         { <sym> <?before \s> <O(|%chaining)> }
3636
3637     token infix:sym« < »
3638         { <sym> <!before '<'> <O(|%chaining)> }
3639
3640     token infix:sym« <= »
3641         { <sym> <O(|%chaining)> }
3642
3643     token infix:sym« > »
3644         { <sym> <!before '>'> <O(|%chaining)> }
3645
3646     token infix:sym« >= »
3647         { <sym> <O(|%chaining)> }
3648
3649     token infix:sym<~~>
3650         { <sym> <O(|%chaining)> <?before \h* ('True'|'False') » <.dumbsmart($0[0].Str)>>? }
3651
3652     method dumbsmart ($litbool) {
3653         self.worry("Smartmatch against $litbool always " ~
3654             ($litbool eq 'True' ?? 'matches' !! 'fails') ~
3655             "; if you mean to test the topic for\n    truthiness, please use " ~
3656             ($litbool eq 'True' ?? ':so or *.so or ?*' !! ':!so or *.not or !*') ~
3657             ' instead');
3658         self;
3659     }
3660
3661     # XXX should move to inside meta !
3662     token infix:sym<!~>
3663         { <sym> \s <.obs('!~ to do negated pattern matching', '!~~')> <O(|%chaining)> }
3664
3665     token infix:sym<=~>
3666         { <sym> <.obs('=~ to do pattern matching', '~~')> <O(|%chaining)> }
3667
3668     token infix:sym<eq>
3669         { <sym> <O(|%chaining)> }
3670
3671     token infix:sym<ne>
3672         { <sym> <O(|%chaining)> }
3673
3674     token infix:sym<lt>
3675         { <sym> <O(|%chaining)> }
3676
3677     token infix:sym<le>
3678         { <sym> <O(|%chaining)> }
3679
3680     token infix:sym<gt>
3681         { <sym> <O(|%chaining)> }
3682
3683     token infix:sym<ge>
3684         { <sym> <O(|%chaining)> }
3685
3686     token infix:sym<=:=>
3687         { <sym> <O(|%chaining)> }
3688
3689     token infix:sym<===>
3690         { <sym> <O(|%chaining)> }
3691
3692     token infix:sym<eqv>
3693         { <sym> <O(|%chaining)> }
3694
3695     token infix:sym<before>
3696         { <sym> <O(|%chaining)> }
3697
3698     token infix:sym<after>
3699         { <sym> <O(|%chaining)> }
3700
3701
3702     ## tight and
3703     token infix:sym<&&>
3704         { <sym> <O(|%tight_and, iffy => 1)> }
3705
3706
3707     ## tight or
3708     token infix:sym<||>
3709         { <sym> <O(|%tight_or, iffy => 1)> }
3710
3711     token infix:sym<^^>
3712         { <sym> <O(|%tight_or, iffy => 1)> }
3713
3714     token infix:sym<//>
3715         { <sym> <O(|%tight_or)> }
3716
3717     token infix:sym<min>
3718         { <sym> <O(|%tight_or)> }
3719
3720     token infix:sym<max>
3721         { <sym> <O(|%tight_or)> }
3722
3723
3724     ## conditional
3725     token infix:sym<?? !!> {
3726         :my $*GOAL ::= '!!';
3727         '??'
3728         <.ws>
3729         <EXPR(item %item_assignment)>
3730         [ '!!'
3731         || <?before '::'<-[=]>> <.panic: "Please use !! rather than ::">
3732         || <infixish> {{
3733                 my $b = $<infixish>.Str;
3734                 if $b eq ':' {
3735                     $¢.panic("Please use !! rather than $b");
3736                 }
3737                 else {
3738                     $¢.panic("Precedence of $b is too loose to use between ?? and !!; please use parens around inner expression");
3739                 }
3740             }}
3741         || <?before \N*? [\n\N*?]?> '!!' <.sorry("Bogus code found before the !!")> <.panic("Confused")>
3742         || <.sorry("Found ?? but no !!")> <.panic("Confused")>
3743         ]
3744         <O(|%conditional, _reducecheck => 'raise_middle')>
3745     }
3746
3747     token infix:sym<!!> {
3748         <sym> ::
3749         [
3750         || <.suppose <infixish>> <.panic: "An infix may not start with !!">
3751         || <.panic: "Ternary !! seems to be missing its ??">
3752         ]
3753     }
3754
3755     method raise_middle {
3756         self.<middle> = self.<infix><EXPR>;
3757         self;
3758     }
3759
3760     token infix:sym<?>
3761         { <sym> {} <!before '?'> <?before <-[;]>*?':'> <.obs('?: for the conditional operator', '??!!')> <O(|%conditional)> }
3762
3763     token infix:sym<ff>
3764         { <sym> <O(|%conditional)> }
3765
3766     token infix:sym<^ff>
3767         { <sym> <O(|%conditional)> }
3768
3769     token infix:sym<ff^>
3770         { <sym> <O(|%conditional)> }
3771
3772     token infix:sym<^ff^>
3773         { <sym> <O(|%conditional)> }
3774
3775     token infix:sym<fff>
3776         { <sym> <O(|%conditional)> }
3777
3778     token infix:sym<^fff>
3779         { <sym> <O(|%conditional)> }
3780
3781     token infix:sym<fff^>
3782         { <sym> <O(|%conditional)> }
3783
3784     token infix:sym<^fff^>
3785         { <sym> <O(|%conditional)> }
3786
3787     ## assignment
3788
3789     token infix:sym<=> ()
3790     {
3791         <sym>
3792         [
3793         || <?{ $*LEFTSIGIL eq '$' }>
3794             <O(|%item_assignment)>
3795         ||  <O(|%list_assignment)>
3796         ]
3797     }
3798
3799     token infix:sym<:=>
3800         { <sym> <O(|%item_assignment)> }
3801
3802     token infix:sym<::=>
3803         { <sym> <O(|%item_assignment)> }
3804
3805     token infix:sym<.=> {
3806         <sym>
3807         <O(|%item_assignment,
3808             nextterm => 'dottyopish',
3809             _reducecheck => 'check_doteq'
3810         )>
3811     }
3812
3813     method check_doteq {
3814         # [ <?before \w+';' | 'new'|'sort'|'subst'|'trans'|'reverse'|'uniq'|'map'|'samecase'|'substr'|'flip'|'fmt'|'pick' > || ]
3815         return self if self.<left><scope_declarator>;
3816         my $ok = 0;
3817
3818         try {
3819             my $methop = self.<right><methodop>;
3820             my $name = $methop.<longname>.Str;
3821             if $name eq 'new' or $name eq 'sort' or $name eq 'subst' or $name eq 'trans' or $name eq 'reverse' or $name eq 'uniq' or $name eq 'map' or $name eq 'samecase' or $name eq 'substr' or $name eq 'flip' or $name eq 'fmt' or $name eq 'pick' {
3822                 $ok = 1;
3823             }
3824             elsif not $methop.<args>[0] {
3825                 $ok = 1;
3826             }
3827         };
3828
3829         self.cursor_force(self.<infix>.pos).worryobs('.= as append operator', '~=') unless $ok;
3830         self;
3831     }
3832
3833     token infix:sym« => »
3834         { <sym> <O(|%item_assignment, fiddly => 0)> }
3835
3836     # Note, other assignment ops generated by infix_postfix_meta_operator rule
3837
3838     ## loose unary
3839     token prefix:sym<so>
3840         { <sym> » <O(|%loose_unary)> }
3841
3842     token prefix:sym<not>
3843         { <sym> » <O(|%loose_unary)> }
3844
3845     ## list item separator
3846     token infix:sym<,> {
3847         <sym> <O(|%comma, fiddly => 0)>
3848         [ <?before \h*'...'> <.worry: "Comma found before apparent series operator; please remove comma (or put parens\n    around the ... listop, or use 'fail' instead of ...)"> ]?
3849     }
3850
3851     token infix:sym<:> {
3852         ':' <?before \s | <terminator> >
3853         {
3854             $¢.sorry("Illegal use of colon as invocant marker") unless $*INVOCANT_OK-- or $*PRECLIM ge $item_assignment_prec;
3855             $<sym> = ',';
3856             $<wascolon> = True;
3857         }
3858         <O(|%comma)>
3859     }
3860
3861     token infix:sym<X>
3862         { <sym> <O(|%list_infix)> }
3863
3864     token infix:sym<Z>
3865         { <sym> <O(|%list_infix)> }
3866
3867     token infix:sym<minmax>
3868         { <sym> <O(|%list_infix)> }
3869
3870     token infix:sym<...>
3871         { <sym> <O(|%list_infix)> }
3872
3873     token term:sym<...>
3874         { <sym> <args>? <O(|%list_prefix)> }
3875
3876     token term:sym<???>
3877         { <sym> <args>? <O(|%list_prefix)> }
3878
3879     token term:sym<!!!>
3880         { <sym> <args>? <O(|%list_prefix)> }
3881
3882     my %deftrap = (
3883         :say, :print, :abs, :alarm, :chomp, :chop, :chr, :chroot, :cos,
3884         :defined, :eval, :exp, :glob, :lc, :lcfirst, :log, :lstat, :mkdir,
3885         :ord, :readlink, :readpipe, :require, :reverse, :rmdir, :sin,
3886         :split, :sqrt, :stat, :uc, :ucfirst, :unlink,
3887         :WHAT, :WHICH, :WHERE, :HOW, :WHENCE, :VAR,
3888     );
3889
3890     # force identifier(), identifier.(), etc. to be a function call always
3891     token term:identifier
3892     {
3893         :my $name;
3894         :my $pos;
3895         :my $isname = 0;
3896         <identifier> <?before [<unsp>|'(']? > <![:]>
3897         {{
3898             $name = $<identifier>.Str;
3899             $pos = $¢.pos;
3900             $isname = $¢.is_name($name);
3901             $¢.check_nodecl($name) if $isname;
3902         }}
3903         <args($isname)>
3904         { self.add_mystery($<identifier>,$pos,substr($*ORIG,$pos,1)) unless $<args><invocant>; }
3905         {{
3906             if $*BORG and $*BORG.<block> {
3907                 if not $*BORG.<name> {
3908                     $*BORG.<culprit> = $<identifier>.cursor($pos);
3909                     $*BORG.<name> = $name;
3910                 }
3911             }
3912             if %deftrap{$name} {
3913                 my $al = $<args><arglist>[0];
3914                 my $ok = 0;
3915                 $ok = 1 if $al and $al.from != $al.to;
3916                 $ok = 1 if $<args><semiarglist>;
3917                 if not $ok {
3918                     $<identifier>.worryobs("bare '$name'", ".$name if you meant \$_, or use an explicit invocant or argument");
3919                 }
3920             }
3921         }}
3922         <O(|%term)>
3923     }
3924
3925     token args ($istype = 0) {
3926         :my $listopish = 0;
3927         :my $*GOAL ::= '';
3928         :my $*INVOCANT_OK = 1;
3929         :my $*INVOCANT_IS;
3930         [
3931     #    | :dba('argument list') '.(' ~ ')' <semiarglist>
3932         | :dba('argument list') '(' ~ ')' <semiarglist>
3933         | :dba('argument list') <.unsp> '(' ~ ')' <semiarglist>
3934         |  { $listopish = 1 } [<?before \s> <!{ $istype }> <.ws> <!infixstopper> <arglist>]?
3935         ]
3936         { $<invocant> = $*INVOCANT_IS; }
3937
3938         :dba('extra arglist after (...):')
3939         [
3940         || <?{ $listopish }>
3941         || ':' <?before \s> <moreargs=.arglist>    # either switch to listopiness
3942         || {{ $<O> = {}; }}   # or allow adverbs (XXX needs hoisting?)
3943         ]
3944     }
3945
3946     # names containing :: may or may not be function calls
3947     # bare identifier without parens also handled here if no other rule parses it
3948     token term:name
3949     {
3950         :my $name;
3951         :my $pos;
3952         <longname>
3953         {
3954             $name = $<longname>.Str;
3955             $pos = $¢.pos;
3956         }
3957         [
3958         ||  <?{
3959                 $¢.is_name($name) or substr($name,0,2) eq '::'
3960             }>
3961             { $¢.check_nodecl($name); }
3962
3963             # parametric type?
3964             :dba('type parameter')
3965             <.unsp>? [ <?before '['> <postcircumfix> ]?
3966
3967             :dba('namespace variable lookup')
3968             [
3969                 <?after '::'>
3970                 <?before [ '«' | '<' | '{' | '<<' ] > <postcircumfix>
3971                 { $*VAR = $¢.cursor_all(self.pos, $¢.pos) }
3972             ]?
3973
3974         # unrecognized names are assumed to be post-declared listops.
3975         || <args> { self.add_mystery($<longname>,$pos,'termish') unless $<args><invocant>; }
3976             {{
3977                 if $*BORG and $*BORG.<block> {
3978                     if not $*BORG.<name> {
3979                         $*BORG.<culprit> = $<longname>.cursor($pos);
3980                         $*BORG.<name> //= $name;
3981                     }
3982                 }
3983             }}
3984         ]
3985         <O(%term)>
3986     }
3987
3988     method check_nodecl($name) {
3989         if $name lt 'a' {
3990             @*MEMOS[self.pos]<nodecl> = $name;
3991         }
3992     }
3993
3994     ## loose and
3995     token infix:sym<and>
3996         { <sym> <O(|%loose_and, iffy => 1)> }
3997
3998     token infix:sym<andthen>
3999         { <sym> <O(|%loose_and)> }
4000
4001     ## loose or
4002     token infix:sym<or>
4003         { <sym> <O(|%loose_or, iffy => 1)> }
4004
4005     token infix:sym<orelse>
4006         { <sym> <O(|%loose_or)> }
4007
4008     token infix:sym<xor>
4009         { <sym> <O(|%loose_or, iffy => 1)> }
4010
4011     ## sequencer
4012     token infix:sym« <== »
4013         { <sym> <O(|%sequencer)> }
4014
4015     token infix:sym« ==> »
4016         { <sym> <O(|%sequencer)> }
4017
4018     token infix:sym« <<== »
4019         { <sym> <O(|%sequencer)> }
4020
4021     token infix:sym« ==>> »
4022         { <sym> <O(|%sequencer)> }
4023
4024     ## expression terminator
4025     # Note: must always be called as <?terminator> or <?before ...<terminator>...>
4026
4027     token terminator:sym<;>
4028         { ';' <O(|%terminator)> }
4029
4030     token terminator:sym<if>
4031         { 'if' » <.nofun> <O(|%terminator)> }
4032
4033     token terminator:sym<unless>
4034         { 'unless' » <.nofun> <O(|%terminator)> }
4035
4036     token terminator:sym<while>
4037         { 'while' » <.nofun> <O(|%terminator)> }
4038
4039     token terminator:sym<until>
4040         { 'until' » <.nofun> <O(|%terminator)> }
4041
4042     token terminator:sym<for>
4043         { 'for' » <.nofun> <O(|%terminator)> }
4044
4045     token terminator:sym<given>
4046         { 'given' » <.nofun> <O(|%terminator)> }
4047
4048     token terminator:sym<when>
4049         { 'when' » <.nofun> <O(|%terminator)> }
4050
4051     token terminator:sym« --> »
4052         { '-->' <O(|%terminator)> }
4053
4054     token terminator:sym<!!>
4055         { '!!' <?{ $*GOAL eq '!!' }> <O(|%terminator)> }
4056
4057     regex infixstopper {
4058         :dba('infix stopper')
4059         [
4060         | <?before <stopper> >
4061         | <?before '!!'> <?{ $*GOAL eq '!!' }>
4062         | <?before '{' | <lambda> > <?{ ($*GOAL eq '{' or $*GOAL eq 'endargs') and @*MEMOS[$¢.pos]<ws> }>
4063         | <?{ $*GOAL eq 'endargs' and @*MEMOS[$¢.pos]<endargs> }>
4064         ]
4065     }
4066
4067 } # end grammar
4068
4069 grammar Q is STD {
4070
4071     role b1 {
4072         token escape:sym<\\> { <sym> {} <item=.backslash> }
4073         token backslash:qq { <?before 'q'> { $<quote> = $¢.cursor_fresh(%*LANG<MAIN>).quote(); } }
4074         token backslash:sym<\\> { <text=.sym> }
4075         token backslash:stopper { <text=.stopper> }
4076         token backslash:a { <sym> }
4077         token backslash:b { <sym> }
4078         token backslash:c { <sym> <charspec> }
4079         token backslash:e { <sym> }
4080         token backslash:f { <sym> }
4081         token backslash:n { <sym> }
4082         token backslash:o { :dba('octal character') <sym> [ <octint> | '[' ~ ']' <octints> ] }
4083         token backslash:r { <sym> }
4084         token backslash:t { <sym> }
4085         token backslash:x { :dba('hex character') <sym> [ <hexint> | '[' ~ ']' <hexints> ] }
4086         token backslash:sym<0> { <sym> }
4087     } # end role
4088
4089     role b0 {
4090         token escape:sym<\\> { <!> }
4091     } # end role
4092
4093     role c1 {
4094         token escape:sym<{ }> { <?before '{'> [ :lang(%*LANG<MAIN>) <embeddedblock> ] }
4095     } # end role
4096
4097     role c0 {
4098         token escape:sym<{ }> { <!> }
4099     } # end role
4100
4101     role s1 {
4102         token escape:sym<$> {
4103             :my $*QSIGIL ::= '$';
4104             <?before '$'>
4105             [ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> ] || <.panic: "Non-variable \$ must be backslashed">
4106         }
4107     } # end role
4108
4109     role s0 {
4110         token escape:sym<$> { <!> }
4111     } # end role
4112
4113     role a1 {
4114         token escape:sym<@> {
4115             :my $*QSIGIL ::= '@';
4116             <?before '@'>
4117             [ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ] # trap ABORTBRANCH from variable's ::
4118         }
4119     } # end role
4120
4121     role a0 {
4122         token escape:sym<@> { <!> }
4123     } # end role
4124
4125     role h1 {
4126         token escape:sym<%> {
4127             :my $*QSIGIL ::= '%';
4128             <?before '%'>
4129             [ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ]
4130         }
4131     } # end role
4132
4133     role h0 {
4134         token escape:sym<%> { <!> }
4135     } # end role
4136
4137     role f1 {
4138         token escape:sym<&> {
4139             :my $*QSIGIL ::= '&';
4140             <?before '&'>
4141             [ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ]
4142         }
4143     } # end role
4144
4145     role f0 {
4146         token escape:sym<&> { <!> }
4147     } # end role
4148
4149     role p1 {
4150         method postprocess ($s) { $s.parsepath }
4151     } # end role
4152
4153     role p0 {
4154         method postprocess ($s) { $s }
4155     } # end role
4156
4157     role w1 {
4158         method postprocess ($s) { $s.words }
4159     } # end role
4160
4161     role w0 {
4162         method postprocess ($s) { $s }
4163     } # end role
4164
4165     role ww1 {
4166         method postprocess ($s) { $s.words }
4167     } # end role
4168
4169     role ww0 {
4170         method postprocess ($s) { $s }
4171     } # end role
4172
4173     role x1 {
4174         method postprocess ($s) { $s.run }
4175     } # end role
4176
4177     role x0 {
4178         method postprocess ($s) { $s }
4179     } # end role
4180
4181     role q {
4182         token stopper { \' }
4183
4184         token escape:sym<\\> { <sym> <item=.backslash> }
4185
4186         token backslash:qq { <?before 'q'> { $<quote> = $¢.cursor_fresh(%*LANG<MAIN>).quote(); } }
4187         token backslash:sym<\\> { <text=.sym> }
4188         token backslash:stopper { <text=.stopper> }
4189
4190         # in single quotes, keep backslash on random character by default
4191         token backslash:misc { {} (.) { $<text> = "\\" ~ $0.Str; } }
4192
4193         # begin tweaks (DO NOT ERASE)
4194         multi method tweak (:single(:$q)!) { self.panic("Too late for :q") }
4195         multi method tweak (:double(:$qq)!) { self.panic("Too late for :qq") }
4196         multi method tweak (:cclass(:$cc)!) { self.panic("Too late for :cc") }
4197         # end tweaks (DO NOT ERASE)
4198
4199     } # end role
4200
4201     role qq does b1 does c1 does s1 does a1 does h1 does f1 {
4202         token stopper { \" }
4203         # in double quotes, omit backslash on random \W backslash by default
4204         token backslash:misc { {} [ (\W) { $<text> = $0.Str; } | $<x>=(\w) <.sorry("Unrecognized backslash sequence: '\\" ~ $<x>.Str ~ "'")> ] }
4205
4206         # begin tweaks (DO NOT ERASE)
4207         multi method tweak (:single(:$q)!) { self.panic("Too late for :q") }
4208         multi method tweak (:double(:$qq)!) { self.panic("Too late for :qq") }
4209         multi method tweak (:cclass(:$cc)!) { self.panic("Too late for :cc") }
4210         # end tweaks (DO NOT ERASE)
4211
4212     } # end role
4213
4214     role cc {
4215         token stopper { \' }
4216
4217         method ccstate ($s) {
4218             if $*CCSTATE eq '..' {
4219                 $*CCSTATE = '';
4220             }
4221             else {
4222                 $*CCSTATE = $s;
4223             }
4224             self;
4225         }
4226
4227         # (must not allow anything to match . in nibbler or we'll lose track of state)
4228         token escape:ws { \s+ [ <?before '#'> <.ws> ]? }
4229         token escape:sym<#> { '#' <.panic: "Please backslash # for literal char or put whitespace in front for comment"> }
4230
4231         token escape:sym<\\> { <sym> <item=.backslash>  <.ccstate('\\' ~ $<item>.Str)> }
4232
4233         token escape:sym<..> { <sym>
4234             [
4235             || <?{ $*CCSTATE eq '' or $*CCSTATE eq '..' }> <.sorry: "Range missing start character on the left">
4236             || <?before \s* <!stopper> <!before '..'> \S >
4237             || <.sorry: "Range missing stop character on the right">
4238             ]
4239             { $*CCSTATE = '..'; }
4240         }
4241
4242         token escape:sym<-> { '-' <?{ $*CCSTATE ne '' }> \s* <!stopper> \S <.obs('- as character range','..')> }
4243         token escape:ch { $<ch> = [\S] <.ccstate($<ch>.Str)> }
4244
4245         token backslash:stopper { <text=.stopper> }
4246         token backslash:a { :i <sym> }
4247         token backslash:b { :i <sym> }
4248         token backslash:c { :i <sym> <charspec> }
4249         token backslash:d { :i <sym> { $*CCSTATE = '' } }
4250         token backslash:e { :i <sym> }
4251         token backslash:f { :i <sym> }
4252         token backslash:h { :i <sym> { $*CCSTATE = '' } }
4253         token backslash:n { :i <sym> }
4254         token backslash:o { :i :dba('octal character') <sym> [ <octint> | '[' ~ ']' <octints> ] }
4255         token backslash:r { :i <sym> }
4256         token backslash:s { :i <sym> { $*CCSTATE = '' } }
4257         token backslash:t { :i <sym> }
4258         token backslash:v { :i <sym> { $*CCSTATE = '' } }
4259         token backslash:w { :i <sym> { $*CCSTATE = '' } }
4260         token backslash:x { :i :dba('hex character') <sym> [ <hexint> | '[' ~ ']' <hexints> ] }
4261         token backslash:sym<0> { <sym> }
4262
4263         # keep random backslashes like qq does
4264         token backslash:misc { {} [ (\W) { $<text> = $0.Str; } | $<x>=(\w) <.sorry("Unrecognized backslash sequence: '\\" ~ $<x>.Str ~ "'")> ] }
4265
4266         # begin tweaks (DO NOT ERASE)
4267         multi method tweak (:single(:$q)!) { self.panic("Too late for :q") }
4268         multi method tweak (:double(:$qq)!) { self.panic("Too late for :qq") }
4269         multi method tweak (:cclass(:$cc)!) { self.panic("Too late for :cc") }
4270         # end tweaks (DO NOT ERASE)
4271
4272     } # end role
4273
4274     role p5 {
4275         # begin tweaks (DO NOT ERASE)
4276         multi method tweak (:$g!) { self }
4277         multi method tweak (:$i!) { self }
4278         multi method tweak (:$m!) { self }
4279         multi method tweak (:$s!) { self }
4280         multi method tweak (:$x!) { self }
4281         multi method tweak (:$p!) { self }
4282         multi method tweak (:$c!) { self }
4283         # end tweaks (DO NOT ERASE)
4284     } # end role
4285
4286     # begin tweaks (DO NOT ERASE)
4287
4288     multi method tweak (:single(:$q)!) { self.truly($q,':q'); self.mixin( ::q ); }
4289
4290     multi method tweak (:double(:$qq)!) { self.truly($qq, ':qq'); self.mixin( ::qq ); }
4291     multi method tweak (:cclass(:$cc)!) { self.truly($cc, ':cc'); self.mixin( ::cc ); }
4292
4293     multi method tweak (:backslash(:$b)!)   { self.mixin($b ?? ::b1 !! ::b0) }
4294     multi method tweak (:scalar(:$s)!)      { self.mixin($s ?? ::s1 !! ::s0) }
4295     multi method tweak (:array(:$a)!)       { self.mixin($a ?? ::a1 !! ::a0) }
4296     multi method tweak (:hash(:$h)!)        { self.mixin($h ?? ::h1 !! ::h0) }
4297     multi method tweak (:function(:$f)!)    { self.mixin($f ?? ::f1 !! ::f0) }
4298     multi method tweak (:closure(:$c)!)     { self.mixin($c ?? ::c1 !! ::c0) }
4299
4300     multi method tweak (:path(:$p)!)        { self.mixin($p ?? ::p1 !! ::p0) }
4301     multi method tweak (:exec(:$x)!)        { self.mixin($x ?? ::x1 !! ::x0) }
4302     multi method tweak (:words(:$w)!)       { self.mixin($w ?? ::w1 !! ::w0) }
4303     multi method tweak (:quotewords(:$ww)!) { self.mixin($ww ?? ::ww1 !! ::ww0) }
4304
4305     multi method tweak (:heredoc(:$to)!) { self.truly($to, ':to'); self.cursor_herelang; }
4306
4307     multi method tweak (:$regex!) {
4308         return %*LANG<Regex>;
4309     }
4310
4311     multi method tweak (*%x) {
4312         my @k = keys(%x);
4313         self.sorry("Unrecognized quote modifier: " ~ join('',@k));
4314     }
4315     # end tweaks (DO NOT ERASE)
4316
4317
4318 } # end grammar
4319
4320 grammar Quasi is STD::P6 {
4321     token term:unquote {
4322         :my $*QUASIMODO = 0;
4323         <starter><starter><starter> <.ws>
4324         [ <EXPR> <stopper><stopper><stopper> || <.panic: "Confused"> ]
4325     }
4326
4327     # begin tweaks (DO NOT ERASE)
4328     multi method tweak (:$ast!) { self; } # XXX some transformer operating on the normal AST?
4329     multi method tweak (:$lang!) { self.cursor_fresh( $lang ); }
4330     multi method tweak (:$unquote!) { self; } # XXX needs to override unquote
4331     multi method tweak (:$COMPILING!) { $*QUASIMODO = 1; self; } # XXX needs to lazify the lexical lookups somehow
4332
4333     multi method tweak (*%x) {
4334         my @k = keys(%x);
4335         self.sorry("Unrecognized quasiquote modifier: " ~ join('',@k));
4336     }
4337     # end tweaks (DO NOT ERASE)
4338
4339 } # end grammar
4340
4341 ##############################
4342 # Operator Precedence Parser #
4343 ##############################
4344
4345 method EXPR ($preclvl?) {
4346     my $*CTX ::= self.callm if $*DEBUG +& DEBUG::trace_call;
4347     my $preclim = $preclvl ?? $preclvl.<prec> // $LOOSEST !! $LOOSEST;
4348     my $*LEFTSIGIL = '';        # XXX P6
4349     my $*PRECLIM = $preclim;
4350     my @termstack;
4351     my @opstack;
4352     my $termish = 'termish';
4353
4354     push @opstack, { 'O' => item %terminator, 'sym' => '' };         # (just a sentinel value)
4355
4356     my $here = self;
4357     my $S = $here.pos;
4358     self.deb("In EXPR, at $S") if $*DEBUG +& DEBUG::EXPR;
4359
4360     my &reduce := -> {
4361         self.deb("entering reduce, termstack == ", +@termstack, " opstack == ", +@opstack) if $*DEBUG +& DEBUG::EXPR;
4362         my $op = pop @opstack;
4363         my $sym = $op<sym>;
4364         given $op<O><assoc> // 'unary' {
4365             when 'chain' {
4366                 self.deb("reducing chain") if $*DEBUG +& DEBUG::EXPR;
4367                 my @chain;
4368                 push @chain, pop(@termstack);
4369                 push @chain, $op;
4370                 while @opstack {
4371                     last if $op<O><prec> ne @opstack[*-1]<O><prec>;
4372                     push @chain, pop(@termstack);
4373                     push @chain, pop(@opstack);
4374                 }
4375                 push @chain, pop(@termstack);
4376                 my $endpos = @chain[0].pos;
4377                 @chain = reverse @chain if @chain > 1;
4378                 my $startpos = @chain[0].from;
4379                 my $nop = $op.cursor_fresh();
4380                 $nop.prepbind(@chain);
4381                 $nop<chain> = [@chain];
4382                 $nop<_arity> = 'CHAIN';
4383                 $nop.from = $startpos;
4384                 $nop.pos = $endpos;
4385                 my @caps;
4386                 my $i = 0;
4387                 for @chain {
4388                     push(@caps, $i++ % 2 ?? 'op' !! 'term' );
4389                     push(@caps, $_);
4390                 }
4391                 $nop<~CAPS> = \@caps;
4392                 push @termstack, $nop._REDUCE($startpos, 'CHAIN');
4393                 @termstack[*-1].<PRE>:delete;
4394                 @termstack[*-1].<POST> :delete;
4395             }
4396             when 'list' {
4397                 self.deb("reducing list") if $*DEBUG +& DEBUG::EXPR;
4398                 my @list;
4399                 my @delims = $op;
4400                 push @list, pop(@termstack);
4401                 while @opstack {
4402                     self.deb($sym ~ " vs " ~ @opstack[*-1]<sym>) if $*DEBUG +& DEBUG::EXPR;
4403                     last if $sym ne @opstack[*-1]<sym>;
4404                     if @termstack and defined @termstack[0] {
4405                         push @list, pop(@termstack);
4406                     }
4407                     else {
4408                         self.worry("Missing term in " ~ $sym ~ " list");
4409                     }
4410                     push @delims, pop(@opstack);
4411                 }
4412                 if @termstack and defined @termstack[0] {
4413                     push @list, pop(@termstack);
4414                 }
4415                 else {
4416                     self.worry("Missing final term in '" ~ $sym ~ "' list");
4417                 }
4418                 my $endpos = @list[0].pos;
4419                 @list = reverse @list if @list > 1;
4420                 my $startpos = @list[0].from;
4421                 @delims = reverse @delims if @delims > 1;
4422                 my $nop = $op.cursor_fresh();
4423                 $nop.prepbind(@list,@delims);
4424                 $nop<sym> = $sym;
4425                 $nop<O> = $op<O>;
4426                 $nop<list> = [@list];
4427                 $nop<delims> = [@delims];
4428                 $nop<_arity> = 'LIST';
4429                 $nop.from = $startpos;
4430                 $nop.pos = $endpos;
4431                 if @list {
4432                     my @caps;
4433                     push @caps, 'elem', @list[0] if @list[0];
4434                     for 0..@delims-1 {
4435                         my $d = @delims[$_];
4436                         my $l = @list[$_+1];
4437                         push @caps, 'delim', $d;
4438                         push @caps, 'elem', $l if $l;  # nullterm?
4439                     }
4440                     $nop<~CAPS> = \@caps;
4441                 }
4442                 push @termstack, $nop._REDUCE($startpos, 'LIST');
4443                 @termstack[*-1].<PRE>:delete;
4444                 @termstack[*-1].<POST>:delete;
4445             }
4446             when 'unary' {
4447                 self.deb("reducing") if $*DEBUG +& DEBUG::EXPR;
4448                 self.deb("Termstack size: ", +@termstack) if $*DEBUG +& DEBUG::EXPR;
4449
4450                 self.deb($op.dump) if $*DEBUG +& DEBUG::EXPR;
4451                 my $arg = pop @termstack;
4452                 $op.prepbind($arg);
4453                 $op<arg> = $arg;
4454                 my $a = $op<~CAPS>;
4455                 $op<_arity> = 'UNARY';
4456                 if $arg.from < $op.from { # postfix
4457                     $op.from = $arg.from;   # extend from to include arg
4458 #                    note "OOPS ", $arg.Str, "\n" if @acaps > 1;
4459                     unshift @$a, 'arg', $arg;
4460                     push @termstack, $op._REDUCE($op.from, 'POSTFIX');
4461                     @termstack[*-1].<PRE>:delete;
4462                     @termstack[*-1].<POST>:delete;
4463                 }
4464                 elsif $arg.pos > $op.pos {   # prefix
4465                     $op.pos = $arg.pos;     # extend pos to include arg
4466 #                    note "OOPS ", $arg.Str, "\n" if @acaps > 1;
4467                     push @$a, 'arg', $arg;
4468                     push @termstack, $op._REDUCE($op.from, 'PREFIX');
4469                     @termstack[*-1].<PRE>:delete;
4470                     @termstack[*-1].<POST>:delete;
4471                 }
4472             }
4473             default {
4474                 self.deb("reducing") if $*DEBUG +& DEBUG::EXPR;
4475                 self.deb("Termstack size: ", +@termstack) if $*DEBUG +& DEBUG::EXPR;
4476
4477                 my $right = pop @termstack;
4478                 my $left = pop @termstack;
4479                 $op.prepbind($left,$right);
4480                 $op<right> = $right;
4481                 $op<left> = $left;
4482                 $op.from = $left.from;
4483                 $op.pos = $right.pos;
4484                 $op<_arity> = 'BINARY';
4485
4486                 my $a = $op<~CAPS>;
4487                 unshift @$a, 'left', $left;
4488                 push @$a, 'right', $right;
4489
4490                 self.deb($op.dump) if $*DEBUG +& DEBUG::EXPR;
4491                 my $ck;
4492                 if $ck = $op<O><_reducecheck> {
4493                     $op = $op.$ck;
4494                 }
4495                 push @termstack, $op._REDUCE($op.from, 'INFIX');
4496                 @termstack[*-1].<PRE>:delete;
4497                 @termstack[*-1].<POST>:delete;
4498             }
4499         }
4500     };
4501
4502   TERM:
4503     loop {
4504         self.deb("In loop, at ", $here.pos) if $*DEBUG +& DEBUG::EXPR;
4505         my $oldpos = $here.pos;
4506         $here = $here.cursor_fresh();
4507         $*LEFTSIGIL = @opstack[*-1]<O><prec> gt $item_assignment_prec ?? '@' !! '';     # XXX P6
4508         my @t = $here.$termish;
4509
4510         if not @t or not $here = @t[0] or ($here.pos == $oldpos and $termish eq 'termish') {
4511             $here.panic("Bogus term") if @opstack > 1;
4512             return ();
4513         }
4514         $termish = 'termish';
4515         my $PRE = $here.<PRE>:delete // [];
4516         my $POST = $here.<POST>:delete // [];
4517         my @PRE = @$PRE;
4518         my @POST = reverse @$POST;
4519
4520         # interleave prefix and postfix, pretend they're infixish
4521         my $M = $here;
4522
4523         # note that we push loose stuff onto opstack before tight stuff
4524         while @PRE and @POST {
4525             my $postO = @POST[0]<O>;
4526             my $preO = @PRE[0]<O>;
4527             if $postO<prec> lt $preO<prec> {
4528                 push @opstack, shift @POST;
4529             }
4530             elsif $postO<prec> gt $preO<prec> {
4531                 push @opstack, shift @PRE;
4532             }
4533             elsif $postO<uassoc> eq 'left' {
4534                 push @opstack, shift @POST;
4535             }
4536             elsif $postO<uassoc> eq 'right' {
4537                 push @opstack, shift @PRE;
4538             }
4539             else {
4540                 $here.sorry('"' ~ @PRE[0]<sym> ~ '" and "' ~ @POST[0]<sym> ~ '" are not associative');
4541             }
4542         }
4543         push @opstack, @PRE,@POST;
4544
4545         push @termstack, $here.<term>;
4546         @termstack[*-1].<POST>:delete;
4547         self.deb("after push: " ~ (0+@termstack)) if $*DEBUG +& DEBUG::EXPR;
4548
4549         last TERM if $preclim eq $methodcall_prec; # in interpolation, probably   # XXX P6
4550
4551         loop {     # while we see adverbs
4552             $oldpos = $here.pos;
4553             last TERM if (@*MEMOS[$oldpos]<endstmt> // 0) == 2;   # XXX P6
4554             $here = $here.cursor_fresh.ws;
4555             my @infix = $here.cursor_fresh.infixish();
4556             last TERM unless @infix;
4557             my $infix = @infix[0];
4558             last TERM unless $infix.pos > $oldpos;
4559
4560             if not $infix<sym> {
4561                 die $infix.dump if $*DEBUG +& DEBUG::EXPR;
4562             }
4563
4564             my $inO = $infix<O>;
4565             my Str $inprec = $inO<prec>;
4566             if not defined $inprec {
4567                 self.deb("No prec given in infix!") if $*DEBUG +& DEBUG::EXPR;
4568                 die $infix.dump if $*DEBUG +& DEBUG::EXPR;
4569                 $inprec = %terminator<prec>;   # XXX lexical scope is wrong
4570             }
4571
4572             if $inprec le $preclim {
4573                 if $preclim ne $LOOSEST {
4574                     my $dba = $preclvl.<dba>;
4575                     my $h = $*HIGHEXPECT;
4576                     %$h = ();
4577                     $h.{"an infix operator with precedence tighter than $dba"} = 1;
4578                 }
4579                 last TERM;
4580             }
4581
4582             $here = $infix.cursor_fresh.ws();
4583
4584             # substitute precedence for listops
4585             $inO<prec> = $inO<sub> if $inO<sub>;
4586
4587             # Does new infix (or terminator) force any reductions?
4588             while @opstack[*-1]<O><prec> gt $inprec {
4589                 &reduce();
4590             }
4591
4592             # Not much point in reducing the sentinels...
4593             last if $inprec lt $LOOSEST;
4594
4595         if $infix<fake> {
4596             push @opstack, $infix;
4597             &reduce();
4598             next;  # not really an infix, so keep trying
4599         }
4600
4601             # Equal precedence, so use associativity to decide.
4602             if @opstack[*-1]<O><prec> eq $inprec {
4603                 my $assoc = 1;
4604                 given $inO<assoc> {
4605                     when 'non'   { $assoc = 0; }
4606                     when 'left'  { &reduce() }   # reduce immediately
4607                     when 'right' { }            # just shift
4608                     when 'chain' { }            # just shift
4609                     when 'unary' { }            # just shift
4610                     when 'list'  {
4611                         $assoc = 0 unless $infix<sym> eqv @opstack[*-1]<sym>;
4612                     }
4613                     default { $here.panic('Unknown associativity "' ~ $_ ~ '" for "' ~ $infix<sym> ~ '"') }
4614                 }
4615                 if not $assoc {
4616                    $here.sorry('"' ~ @opstack[*-1]<sym> ~ '" and "' ~ $infix.Str ~ '" are non-associative and require parens');
4617                 }
4618             }
4619
4620             $termish = $inO<nextterm> if $inO<nextterm>;
4621             push @opstack, $infix;              # The Shift
4622             last;
4623         }
4624     }
4625     &reduce() while +@opstack > 1;
4626     if @termstack {
4627         +@termstack == 1 or $here.panic("Internal operator parser error, termstack == " ~ (+@termstack));
4628         @termstack[0].from = self.pos;
4629         @termstack[0].pos = $here.pos;
4630     }
4631     self._MATCHIFYr($S, "EXPR", @termstack);
4632 }
4633
4634 ##########
4635 ## Regex #
4636 ##########
4637
4638 grammar Regex is STD {
4639
4640     # begin tweaks (DO NOT ERASE)
4641     multi method tweak (:Perl5(:$P5)!) { self.require_P5; self.cursor_fresh( %*LANG<Q> ).mixin( ::q ).mixin( ::p5 ) }
4642     multi method tweak (:overlap(:$ov)!) { %*RX<ov> = $ov; self; }
4643     multi method tweak (:exhaustive(:$ex)!) { %*RX<ex> = $ex; self; }
4644     multi method tweak (:continue(:$c)!) { %*RX<c> = $c; self; }
4645     multi method tweak (:pos(:$p)!) { %*RX<p> = $p; self; }
4646     multi method tweak (:sigspace(:$s)!) { %*RX<s> = $s; self; }
4647     multi method tweak (:ratchet(:$r)!) { %*RX<r> = $r; self; }
4648     multi method tweak (:global(:$g)!) { %*RX<g> = $g; self; }
4649     multi method tweak (:ignorecase(:$i)!) { %*RX<i> = $i; self; }
4650     multi method tweak (:ignoreaccent(:$a)!) { %*RX<a> = $a; self; }
4651     multi method tweak (:samecase(:$ii)!) { %*RX<ii> = $ii; self; }
4652     multi method tweak (:sameaccent(:$aa)!) { %*RX<aa> = $aa; self; }
4653     multi method tweak (:$nth!) { %*RX<nth> = $nth; self; }
4654     multi method tweak (:st(:$nd)!) { %*RX<nth> = $nd; self; }
4655     multi method tweak (:rd(:$th)!) { %*RX<nth> = $th; self; }
4656     multi method tweak (:$x!) { %*RX<x> = $x; self; }
4657     multi method tweak (:$bytes!) { %*RX<bytes> = $bytes; self; }
4658     multi method tweak (:$codes!) { %*RX<codes> = $codes; self; }
4659     multi method tweak (:$graphs!) { %*RX<graphs> = $graphs; self; }
4660     multi method tweak (:$chars!) { %*RX<chars> = $chars; self; }
4661     multi method tweak (:$rw!) { %*RX<rw> = $rw; self; }
4662     # end tweaks (DO NOT ERASE)
4663
4664     token category:metachar { <sym> }
4665     proto token metachar {*}
4666
4667     token category:backslash { <sym> }
4668     proto token backslash {*}
4669
4670     token category:assertion { <sym> }
4671     proto token assertion {*}
4672
4673     token category:quantifier { <sym> }
4674     proto token quantifier {*}
4675
4676     token category:mod_internal { <sym> }
4677     proto token mod_internal {*}
4678
4679     proto token regex_infix {*}
4680
4681     # no such thing as ignored whitespace in a normal regex
4682     token ws { <?> }
4683
4684     token normspace {
4685         <?before \s | '#'> [ :lang(%*LANG<MAIN>) <.ws> ]
4686     }
4687
4688     token unsp { '\\' <?before \s | '#'> <.panic: "No unspace allowed in regex; if you meant to match the literal character, please enclose in single quotes ('" ~ substr($::ORIG,$¢.pos,1) ~ "') or use a backslashed form like \\x" ~ sprintf("%02x", ord(substr($::ORIG,$¢.pos,1)))> }  # no unspace in regexen
4689
4690     rule nibbler {
4691         :temp %*RX;
4692         [ <.normspace>? < || | && & > ]?
4693         <EXPR>
4694         [
4695         || <?infixstopper>
4696         || $$ <.panic: "Regex not terminated">
4697         || (\W)<.sorry("Unrecognized regex metacharacter " ~ $0.Str ~ " (must be quoted to match literally)")>
4698         || <.panic: "Regex not terminated">
4699         ]
4700     }
4701
4702     token termish {
4703         <.ws>
4704         [
4705         || <term=.quant_atom_list> <?{ %*RX<s> or $<term>.Str ~~ /\S/ }>
4706         || <normspace>
4707             [
4708             || <?before <stopper> | <[&|~]> > <.panic: "Null pattern not allowed">
4709             || <?before <[ \] \) \> ]> > {{
4710                     my $c = substr($*ORIG,$¢.pos,1);
4711                     if $*GOAL eq $c {
4712                         $¢.panic("Null pattern not allowed");
4713                     }
4714                     else {
4715                         $¢.panic("Unmatched closing $c");
4716                     }
4717                 }}
4718             || $$ <.panic: "Regex not terminated">
4719             || \W <.sorry: "Unrecognized regex metacharacter (must be quoted to match literally)">
4720             || <.panic: "Regex not terminated">
4721             ]
4722         ]
4723     }
4724     token quant_atom_list {
4725         <quantified_atom>+
4726     }
4727     token infixish {
4728         <!infixstopper>
4729         <!stdstopper>
4730         <regex_infix>
4731         {
4732             $<O> = $<regex_infix><O>;
4733             $<sym> = $<regex_infix><sym>;
4734         }
4735     }
4736     regex infixstopper {
4737         :dba('infix stopper')
4738         [
4739         | <?before <[\) \} \]]> >
4740         | <?before '>' <-[>)]> >
4741         | <?before <stopper> >
4742         ]
4743     }
4744
4745     token regex_infix:sym<||> { <sym> <O(|%tight_or)>  }
4746     token regex_infix:sym<&&> { <sym> <O(|%tight_and)>  }
4747     token regex_infix:sym<|> { <sym> <O(|%junctive_or)>  }
4748     token regex_infix:sym<&> { <sym> <O(|%junctive_and)>  }
4749
4750     token quantified_atom {
4751         <!stopper>
4752         <!regex_infix>
4753         <atom>
4754         [ <normspace>? <quantifier> ]?
4755 #            <?{ $<atom>.max_width }>
4756 #                || <.panic: "Can't quantify zero-width atom">
4757     }
4758
4759     token atom {
4760         :dba('regex atom')
4761         [
4762         | \w
4763         | <metachar> ::
4764         ]
4765     }
4766
4767     # sequence stoppers
4768     token metachar:sym« > » { '>'<!before '>'> :: <fail> }
4769     token metachar:sym<&&>  { '&&' :: <fail> }
4770     token metachar:sym<&>   { '&'  :: <fail> }
4771     token metachar:sym<||>  { '||' :: <fail> }
4772     token metachar:sym<|>   { '|'  :: <fail> }
4773     token metachar:sym<]>   { ']'  :: <fail> }
4774     token metachar:sym<)>   { ')'  :: <fail> }
4775     token metachar:sym<;>   {
4776         ';' {}
4777         [
4778         || <?before \N*? <stopper> > <.panic: "Semicolon must be quoted">
4779         || <?before .> <.panic: "Regex missing terminator (or semicolon must be quoted?)">
4780         || <.panic: "Regex missing terminator">   # the final fake ;
4781         ]
4782     }
4783
4784     token metachar:sym<{*}> { <onlystar=.sym> <?{ $*MULTINESS eq 'proto' }> }
4785     token metachar:sym<[*]> { <onlystar=.sym> <?{ $*MULTINESS eq 'proto' }> }
4786     token metachar:quant { <quantifier> <.sorry: "Quantifier quantifies nothing"> }
4787
4788     # "normal" metachars
4789
4790     token metachar:sigwhite {
4791         <normspace>
4792     }
4793     token metachar:unsp   { <unsp> }
4794
4795     token metachar:sym<{N,M}> {
4796         '{' (\d+) (','?) (\d*) '}'
4797         {
4798             my $all = substr($*ORIG, self.pos, $¢.pos - self.pos);
4799             my $repl = chars($1.Str) ??
4800                 ($0.Str ~ '..' ~ ($2.Str || '*')) !! $0.Str;
4801             $¢.sorryobs($all ~ " as general quantifier", 'X**' ~ $repl);
4802         }
4803     }
4804
4805     token metachar:sym<{ }> {
4806         <?before '{'>
4807         <embeddedblock>
4808         {{ $/<sym> := <{ }> }}
4809     }
4810
4811     token metachar:mod {
4812         <?before ':'>
4813         <mod_internal>
4814         { $/<sym> := $<mod_internal><sym> }
4815     }
4816
4817     token metachar:sym<-> {
4818         '-' <?{ $*GOAL eq ']' }> <.sorry("Invalid regex metacharacter (must be quoted to match literally)")>
4819     }
4820
4821     token metachar:sym<:> {
4822         <sym> <?before \s> <.panic: "Backtrack control ':' does not seem to have a preceding atom to control">
4823     }
4824
4825     token metachar:sym<::> {
4826         <sym>
4827     }
4828
4829     token metachar:sym<:::> {
4830         <sym>
4831     }
4832
4833     token metachar:sym<[ ]> {
4834         :dba("bracketed regex")
4835         '[' ~ ']' <nibbler>
4836         { $¢.check_old_cclass($<nibbler>.Str); }
4837         { $/<sym> := <[ ]>; }
4838     }
4839
4840     token metachar:sym<( )> {
4841         :dba("capture parens")
4842         '(' ~ ')' <nibbler>
4843         { $/<sym> := <( )> }
4844     }
4845
4846     token metachar:sym« <( » { '<(' }
4847     token metachar:sym« )> » { ')>' }
4848
4849     token metachar:sym« << » { '<<' }
4850     token metachar:sym« >> » { '>>' }
4851     token metachar:sym< « > { '«' }
4852     token metachar:sym< » > { '»' }
4853
4854     token metachar:qw {
4855         <?before '<' \s >  # (note required whitespace)
4856         <circumfix>
4857     }
4858
4859     token metachar:sym«< >» {
4860         '<' ~ '>' <assertion>
4861     }
4862
4863     token metachar:sym<\\> { <sym> <backslash> }
4864     token metachar:sym<.>  { <sym> }
4865     token metachar:sym<^^> { <sym> }
4866     token metachar:sym<^>  { <sym> }
4867     token metachar:sym<$$> {
4868         <sym>
4869         [ (\w+) <.obs("\$\$" ~ $0.Str ~ " to deref var inside a regex", "\$(\$" ~ $0.Str ~ ")")> ]?
4870     }
4871     token metachar:sym<$>  {
4872         '$'
4873         <?before
4874         | \s
4875         | '|'
4876         | '&'
4877         | ')'
4878         | ']'
4879         | '>'
4880         | $
4881         | <.stopper>
4882         >
4883     }
4884
4885     token metachar:sym<' '> { <?before "'"> [:lang(%*LANG<MAIN>) <quote>] }
4886     token metachar:sym<" "> { <?before '"'> [:lang(%*LANG<MAIN>) <quote>] }
4887
4888     token metachar:var {
4889         :my $*QSIGIL ::= substr($*ORIG,self.pos,1);
4890         <!before '$$'>
4891         <?before <sigil>>
4892         [:lang(%*LANG<MAIN>) <termish> ]
4893         $<binding> = ( \s* '=' \s* <quantified_atom> )?
4894         { $<sym> = $<termish><term>.Str; }
4895     }
4896
4897     token backslash:unspace { <?before \s> <.SUPER::ws> }
4898
4899     token backslash:sym<0> { '0' <!before <[0..7]> > }
4900
4901     token backslash:A { <sym> <.obs('\\A as beginning-of-string matcher', '^')> }
4902     token backslash:a { <sym> <.sorry: "\\a is allowed only in strings, not regexes"> }
4903     token backslash:B { <sym> <.obs('\\B as word non-boundary', '<!wb>')> }
4904     token backslash:b { <sym> <.obs('\\b as word boundary', '<?wb> (or either of « or »)')> }
4905     token backslash:c { :i <sym> <charspec> }
4906     token backslash:d { :i <sym> }
4907     token backslash:e { :i <sym> }
4908     token backslash:f { :i <sym> }
4909     token backslash:h { :i <sym> }
4910     token backslash:n { :i <sym> }
4911     token backslash:o { :i :dba('octal character') <sym> [ <octint> | '[' ~ ']' <octints> ] }
4912     token backslash:Q { <sym> <.obs('\\Q as quotemeta', 'quotes or literal variable match')> }
4913     token backslash:r { :i <sym> }
4914     token backslash:s { :i <sym> }
4915     token backslash:t { :i <sym> }
4916     token backslash:v { :i <sym> }
4917     token backslash:w { :i <sym> }
4918     token backslash:x { :i :dba('hex character') <sym> [ <hexint> | '[' ~ ']' <hexints> ] }
4919     token backslash:z { <sym> <.obs('\\z as end-of-string matcher', '$')> }
4920     token backslash:Z { <sym> <.obs('\\Z as end-of-string matcher', '\\n?$')> }
4921     token backslash:misc { $<litchar>=(\W) }
4922     token backslash:oldbackref { (<[1..9]>\d*) { my $d = $0.Str; $¢.sorryobs("the 1-based special form '\\$d' as a backreference", "the 0-based variable '\$" ~ ($d - 1) ~ "' instead" ); } }
4923     token backslash:oops { <.sorry: "Unrecognized regex backslash sequence"> . }
4924
4925     token assertion:sym<...> { <sym> }
4926     token assertion:sym<???> { <sym> }
4927     token assertion:sym<!!!> { <sym> }
4928
4929     token assertion:sym<?> { <sym> [ <?before '>'> | <assertion> ] }
4930     token assertion:sym<!> { <sym> [ <?before '>'> | <assertion> ] }
4931     token assertion:sym<*> { <sym> [ <?before '>'> | <.ws> <nibbler> ] }
4932
4933     token assertion:sym<{ }> { <embeddedblock> }
4934
4935     token assertion:variable {
4936         <?before <sigil>>  # note: semantics must be determined per-sigil
4937         [:lang($¢.cursor_fresh(%*LANG<MAIN>).unbalanced('>')) <variable=.EXPR(item %LOOSEST)>]
4938     }
4939
4940     token assertion:method {
4941         '.' [
4942             | <?before <alpha> > <assertion>
4943             | [ :lang($¢.cursor_fresh(%*LANG<MAIN>).unbalanced('>')) <dottyop> ]
4944             ]
4945     }
4946
4947     token assertion:name { [ :lang($¢.cursor_fresh(%*LANG<MAIN>).unbalanced('>')) <longname> ]
4948                                     [
4949                                     | <?before '>' >
4950                                     | <.ws> <nibbler> <.ws>
4951                                     | '=' <assertion>
4952                                     | ':' [ :lang($¢.cursor_fresh(%*LANG<MAIN>).unbalanced('>')) <.ws> <arglist> ]
4953                                     | '(' {}
4954                                         [ :lang(%*LANG<MAIN>) <arglist> ]
4955                                         [ ')' || <.panic: "Assertion call missing right parenthesis"> ]
4956                                     ]?
4957     }
4958
4959     token assertion:sym<[> { <?before '['> <cclass_elem>+ }
4960     token assertion:sym<+> { <?before '+'> <cclass_elem>+ }
4961     token assertion:sym<-> { <?before '-'> <cclass_elem>+ }
4962     token assertion:sym<.> { <sym> }
4963     token assertion:sym<,> { <sym> }
4964     token assertion:sym<~~> { <sym> [ <?before '>'> | \d+ | <desigilname> ] }
4965
4966     token assertion:bogus { <.panic: "Unrecognized regex assertion"> }
4967
4968     token sign { '+' | '-' | <?> }
4969     token cclass_elem {
4970         :my $*CCSTATE = '';
4971         :dba('character class element')
4972         <sign>
4973         <.normspace>?
4974         [
4975         | <name>
4976         | <before '['> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:cc))>
4977         ]
4978         <.normspace>?
4979     }
4980
4981     token mod_arg { :dba('modifier argument') '(' ~ ')' [:lang(%*LANG<MAIN>) <semilist> ] }
4982
4983     token mod_internal:sym<:my>    { ':' <?before ['my'|'state'|'our'|'anon'|'constant'|'temp'|'let'] \s > [:lang(%*LANG<MAIN>) <statement> <eat_terminator> ] }
4984
4985     # XXX needs some generalization
4986
4987     token mod_internal:sym<:i>    { $<sym>=[':i'|':ignorecase'] » { %*RX<i> = 1 } }
4988     token mod_internal:sym<:!i>   { $<sym>=[':!i'|':!ignorecase'] » { %*RX<i> = 0 } }
4989     token mod_internal:sym<:i( )> { $<sym>=[':i'|':ignorecase'] <mod_arg> { %*RX<i> = eval $<mod_arg>.Str } }
4990     token mod_internal:sym<:0i>   { ':' (\d+) ['i'|'ignorecase'] { %*RX<i> = $0 } }
4991
4992     token mod_internal:sym<:a>    { $<sym>=[':a'|':ignoreaccent'] » { %*RX<a> = 1 } }
4993     token mod_internal:sym<:!a>   { $<sym>=[':!a'|':!ignoreaccent'] » { %*RX<a> = 0 } }
4994     token mod_internal:sym<:a( )> { $<sym>=[':a'|':ignoreaccent'] <mod_arg> { %*RX<a> = eval $<mod_arg>.Str } }
4995     token mod_internal:sym<:0a>   { ':' (\d+) ['a'|'ignoreaccent'] { %*RX<a> = $0 } }
4996
4997     token mod_internal:sym<:s>    { ':s' 'igspace'? » { %*RX<s> = 1 } }
4998     token mod_internal:sym<:!s>   { ':!s' 'igspace'? » { %*RX<s> = 0 } }
4999     token mod_internal:sym<:s( )> { ':s' 'igspace'? <mod_arg> { %*RX<s> = eval $<mod_arg>.Str } }
5000     token mod_internal:sym<:0s>   { ':' (\d+) 's' 'igspace'? » { %*RX<s> = $0 } }
5001
5002     token mod_internal:sym<:r>    { ':r' 'atchet'? » { %*RX<r> = 1 } }
5003     token mod_internal:sym<:!r>   { ':!r' 'atchet'? » { %*RX<r> = 0 } }
5004     token mod_internal:sym<:r( )> { ':r' 'atchet'? » <mod_arg> { %*RX<r> = eval $<mod_arg>.Str } }
5005     token mod_internal:sym<:0r>   { ':' (\d+) 'r' 'atchet'? » { %*RX<r> = $0 } }
5006
5007     token mod_internal:sym<:Perl5>    { [':Perl5' | ':P5'] <.require_P5> [ :lang( $¢.cursor_fresh( %*LANG<P5Regex> ).unbalanced($*GOAL) ) <nibbler> ] }
5008
5009     token mod_internal:p6adv {
5010         <?before ':' ['dba'|'lang'] » > [ :lang(%*LANG<MAIN>) <quotepair> ] { $/<sym> := ':' ~ $<quotepair><k> }
5011     }
5012
5013     token mod_internal:oops { {} (':'\w+) <.sorry: "Unrecognized regex modifier " ~ $0.Str > }
5014
5015     token quantifier:sym<*>  { <sym> <quantmod> }
5016     token quantifier:sym<+>  { <sym> <quantmod> }
5017     token quantifier:sym<?>  { <sym> <quantmod> }
5018     token quantifier:sym<:>  { <sym> {} <?before \s> }
5019     token quantifier:sym<**> { <sym> :: <normspace>? <quantmod> <normspace>?
5020         [
5021         | \d+ \s+ '..' <.panic: "Spaces not allowed in bare range">
5022         | \d+ [ '..' [ \d+ | '*' | <.panic: "Malformed range"> ] ]?
5023         | <embeddedblock>
5024         | <quantified_atom>
5025         ]
5026     }
5027
5028     token quantifier:sym<~> {
5029         <sym> :: <normspace>? <quantified_atom> <normspace>? <quantified_atom>
5030     }
5031
5032     token quantifier:sym<~~> {
5033         [
5034         | '!' <sym>
5035         | <sym>
5036         ]
5037         <normspace> <quantified_atom> }
5038
5039     token quantmod { ':'? [ '?' | '!' | '+' ]? }
5040
5041 } # end grammar
5042
5043 method require_P5 {
5044     require STD_P5;
5045     self;
5046 }
5047
5048 method require_P6 {
5049     require STD_P6;
5050     self;
5051 }
5052
5053 #################
5054 # Symbol tables #
5055 #################
5056
5057 method newlex ($needsig = 0) {
5058     my $oid = $*CURLEX.id;
5059     $ALL.{$oid} === $*CURLEX or die "internal error: current lex id is invalid";
5060     my $line = self.lineof(self.pos);
5061     my $id;
5062     if $*NEWLEX {
5063         $*NEWLEX.<OUTER::> = $*CURLEX.idref;
5064         $*CURLEX = $*NEWLEX;
5065         $*NEWLEX = 0;
5066         $id = $*CURLEX.id;
5067     }
5068     else {
5069         $id = 'MY:file<' ~ $*FILE<name> ~ '>:line(' ~ $line ~ '):pos(' ~ self.pos ~ ')';
5070         $*CURLEX = Stash.new(
5071             'OUTER::' => [$oid],
5072             '!file' => $*FILE, '!line' => $line,
5073             '!id' => [$id],
5074         );
5075     }
5076     $*CURLEX.<!NEEDSIG> = 1 if $needsig;
5077     $*CURLEX.<!IN_DECL> = $*IN_DECL if $*IN_DECL;
5078     $ALL.{$id} = $*CURLEX;
5079     self.<LEX> = $*CURLEX;
5080     self;
5081 }
5082
5083 method finishlex {
5084     my $line = self.lineof(self.pos);
5085     $*CURLEX<$_> //= NAME.new( name => '$_', file => $*FILE, line => $line, dynamic => 1, scope => 'my' );
5086     $*CURLEX<$/> //= NAME.new( name => '$/', file => $*FILE, line => $line, dynamic => 1, scope => 'my' );
5087     $*CURLEX<$!> //= NAME.new( name => '$!', file => $*FILE, line => $line, dynamic => 1, scope => 'my' );
5088     $*SIGNUM = 0;
5089     self;
5090 }
5091
5092 method getsig {
5093     my $pv = $*CURLEX.{'%?PLACEHOLDERS'};
5094     my $sig;
5095     if $*CURLEX.<!NEEDSIG>:delete {
5096         if $pv {
5097             my $h_ = $pv.<%_>:delete;
5098             my $a_ = $pv.<@_>:delete;
5099             $sig = join ', ', sort { substr($^a,1) leg substr($^b,1) }, keys %$pv;
5100             $sig ~= ', *@_' if $a_;
5101             $sig ~= ', *%_' if $h_;
5102         }
5103         else {
5104             $sig = '$_ is ref = OUTER::<$_>';
5105         }
5106         $*CURLEX.<$?SIGNATURE> = $sig;
5107     }
5108     else {
5109         $sig = $*CURLEX.<$?SIGNATURE>;
5110     }
5111     self.<sig> = self.makestr(TEXT => $sig);
5112     self.<lex> = $*CURLEX.idref;
5113     if ($*DECLARAND<mult>//'') ne 'proto' {
5114         for keys %$*CURLEX {
5115             my $desc = $*CURLEX{$_};
5116             next unless $_ ~~ m/(\$|\@|\%|\&)\w/;
5117             next if $_ eq '$_' or $_ eq '@_' or $_ eq '%_';
5118             next if $desc<used>;
5119             next if $desc<rebind>;
5120             next if $desc<dynamic>;
5121             next if $desc<scope> eq 'state';
5122             next if $desc<stub>;
5123             my $pos = $desc<declaredat> // self.pos;
5124             self.cursor($pos).worry("$_ is declared but not used");
5125         }
5126     }
5127     self;
5128 }
5129
5130 method getdecl {
5131     self.<decl> = $*DECLARAND;
5132     self;
5133 }
5134
5135 method is_name ($n, $curlex = $*CURLEX) {
5136     my $name = $n;
5137     self.deb("is_name $name") if $*DEBUG +& DEBUG::symtab;
5138
5139     my $curpkg = $*CURPKG;
5140     return True if $name ~~ /\:\:\(/;
5141     my @components = self.canonicalize_name($name);
5142     if @components > 1 {
5143         return True if @components[0] eq 'COMPILING::';
5144         return True if @components[0] eq 'CALLER::';
5145         return True if @components[0] eq 'CONTEXT::';
5146         if $curpkg = self.find_top_pkg(@components[0]) {
5147             self.deb("Found lexical package ", @components[0]) if $*DEBUG +& DEBUG::symtab;
5148             shift @components;
5149         }
5150         else {
5151             self.deb("Looking for GLOBAL::<$name>") if $*DEBUG +& DEBUG::symtab;
5152             $curpkg = $*GLOBAL;
5153         }
5154         while @components > 1 {
5155             my $pkg = shift @components;
5156             $curpkg = $curpkg.{$pkg};
5157             return False unless $curpkg;
5158             try {
5159                 my $outlexid = $curpkg.[0];
5160                 return False unless $outlexid;
5161                 $curpkg = $ALL.{$outlexid};
5162                 return False unless $curpkg;
5163             };
5164             self.deb("Found $pkg okay") if $*DEBUG +& DEBUG::symtab;
5165         }
5166     }
5167     $name = shift(@components)//'';
5168     self.deb("Looking for $name") if $*DEBUG +& DEBUG::symtab;
5169     return True if $name eq '';
5170     my $lex = $curlex;
5171     while $lex {
5172         self.deb("Looking in ", $lex.id) if $*DEBUG +& DEBUG::symtab;
5173         if $lex.{$name} {
5174             self.deb("Found $name in ", $lex.id) if $*DEBUG +& DEBUG::symtab;
5175             $lex.{$name}<used> = 1;
5176             return True;
5177         }
5178         my $oid = $lex.<OUTER::>[0] || last;
5179         $lex = $ALL.{$oid};
5180     }
5181     return True if $curpkg.{$name};
5182     return True if $*GLOBAL.{$name};
5183     self.deb("$name not found") if $*DEBUG +& DEBUG::symtab;
5184     return False;
5185 }
5186
5187 method find_stash ($n, $curlex = $*CURLEX) {
5188     my $name = $n;
5189     self.deb("find_stash $name") if $*DEBUG +& DEBUG::symtab;
5190
5191     return () if $name ~~ /\:\:\(/;
5192     my @components = self.canonicalize_name($name);
5193     if @components > 1 {
5194         return () if @components[0] eq 'COMPILING::';
5195         return () if @components[0] eq 'CALLER::';
5196         return () if @components[0] eq 'CONTEXT::';
5197         if $curlex = self.find_top_pkg(@components[0]) {
5198             self.deb("Found lexical package ", @components[0]) if $*DEBUG +& DEBUG::symtab;
5199             shift @components;
5200         }
5201         else {
5202             self.deb("Looking for GLOBAL::<$name>") if $*DEBUG +& DEBUG::symtab;
5203             $curlex = $*GLOBAL;
5204         }
5205         while @components > 1 {
5206             my $lex = shift @components;
5207             $curlex = $curlex.{$lex};
5208             return () unless $curlex;
5209             try {
5210                 my $outlexid = $curlex.[0];
5211                 return False unless $outlexid;
5212                 $curlex = $ALL.{$outlexid};
5213                 return () unless $curlex;
5214             };
5215             self.deb("Found $lex okay") if $*DEBUG +& DEBUG::symtab;
5216         }
5217     }
5218     $name = shift(@components)//'';
5219     return $curlex if $name eq '';
5220
5221     my $lex = $curlex;
5222     while $lex {
5223         return $_ if $_ = $lex.{$name};
5224         my $oid = $lex.<OUTER::>[0] || last;
5225         $lex = $ALL.{$oid};
5226     }
5227     return $_ if $_ = $curlex.{$name};
5228     return $_ if $_ = $*GLOBAL.{$name};
5229     return ();
5230 }
5231
5232 method find_top_pkg ($name) {
5233     self.deb("find_top_pkg $name") if $*DEBUG +& DEBUG::symtab;
5234     $name ~= '::' unless $name ~~ /\:\:$/;
5235     if $name eq 'OUR::' {
5236         return $*CURPKG;
5237     }
5238     elsif $name eq 'MY::' {
5239         return $*CURLEX;
5240     }
5241     elsif $name eq 'OUTER::' {
5242         return $ALL.{$*CURLEX.<OUTER::>[0]};
5243     }
5244     elsif $name eq 'CORE::' {
5245         return $*CORE;
5246     }
5247     elsif $name eq 'SETTING::' {
5248         return $*SETTING;
5249     }
5250     elsif $name eq 'UNIT::' {
5251         return $*UNIT;
5252     }
5253     # everything is somewhere in lexical scope (we hope)
5254     my $lex = $*CURLEX;
5255     while $lex {
5256         return $lex.{$name} if $lex.{$name};
5257         my $oid = $lex.<OUTER::>[0] || last;
5258         $lex = $ALL.{$oid};
5259     }
5260     return 0;
5261 }
5262
5263 method add_name ($name) {
5264     my $scope = $*SCOPE || 'my';
5265     my $pkgdecl = $*PKGDECL || 'symbol';
5266     return self if $scope eq 'anon' or $pkgdecl eq 'slang';
5267     self.deb("Adding $scope $name") if $*DEBUG +& DEBUG::symtab;
5268     if $scope eq 'augment' or $scope eq 'supersede' {
5269         self.is_name($name) or
5270             self.worry("Can't $scope $pkgdecl $name because it doesn't exist");
5271         $*MONKEY_TYPING or
5272             self.sorry("Can't $scope $pkgdecl $name without MONKEY_TYPING");
5273     }
5274     else {
5275         if $scope eq 'our' {
5276             self.add_our_name($name);
5277         }
5278         else {
5279             self.add_my_name($name);
5280         }
5281     }
5282     self;
5283 }
5284
5285 method add_my_name ($n, $d = Nil, $p = Nil) {   # XXX gimme doesn't handle optionals right
5286     my $name = $n;
5287     self.deb("add_my_name $name in ", $*CURLEX.id) if $*DEBUG +& DEBUG::symtab;
5288     return self if $name ~~ /\:\:\(/;
5289     my $curstash = $*CURLEX;
5290     my @components = self.canonicalize_name($name);
5291     my $sid = $curstash.id // '???';
5292     while @components > 1 {
5293         my $pkg = shift @components;
5294         $sid ~= "::$pkg";
5295         my $newstash = $curstash.{$pkg} //= Stash.new(
5296             'PARENT::' => $curstash.idref,
5297             '!stub' => 1,
5298             '!id' => [$sid] );
5299         self.deb("Adding new package $pkg in ", $curstash.id) if $*DEBUG +& DEBUG::symtab;
5300         $curstash = $newstash;
5301     }
5302     $name = my $shortname = shift @components;
5303     return self unless defined $name and $name ne '';
5304     return self if $name eq '$' or $name eq '@' or $name eq '%';
5305     return self.add_categorical(substr($name,1)) if $name ~~ /^\&\w+\:/;
5306     if $shortname ~~ /\:/ {
5307         $shortname ~~ s/\:.*//;
5308     }
5309
5310     # This may just be a lexical alias to "our" and such,
5311     # so reuse $*DECLARAND pointer if it's there.
5312     my $declaring = $d // NAME.new(
5313         xlex => $curstash.idref,
5314         name => $name,
5315         file => $*FILE, line => self.line,
5316         mult => ($*MULTINESS||'only'),
5317         of   => $*OFTYPE,
5318         scope => $*SCOPE,
5319     );
5320     my $old = $curstash.{$name};
5321     if $old and $old<line> and not $old<stub> {
5322         self.deb("$name exists, curstash = ", $curstash.id) if $*DEBUG +& DEBUG::symtab;
5323         my $omult = $old<mult> // '';
5324         if $declaring === $old {}  # already did this, probably enum
5325         elsif $*SCOPE eq 'use' {}
5326         elsif $*MULTINESS eq 'multi' and $omult ne 'only' {}
5327         elsif $omult eq 'proto' and $*MULTINESS ne 'proto' and $*MULTINESS ne 'only' {}
5328         elsif $*PKGDECL eq 'role' {}
5329         elsif $*SIGNUM and $old<signum> and $*SIGNUM != $old<signum> {
5330             $old<signum> = $*SIGNUM;
5331         }
5332         else {
5333             my $ofile = $old.file // 0;
5334             my $oline = $old.line // '???';
5335             my $loc = '';
5336             if $ofile {
5337                 if $ofile !=== $*FILE {
5338                     my $oname = $ofile<name>;
5339                     $loc = " (see $oname line $oline)";
5340                 }
5341                 else {
5342                     $loc = " (see line $oline)";
5343                 }
5344             }
5345             if $old.olex {
5346                 my $rebind = $old<rebind>;
5347                 my $truename = $old<varbind><truename>;
5348                 self.sorry("Lexical symbol '$name' is already bound to an outer symbol$loc;\n  the implicit outer binding at line $rebind must be rewritten as $truename\n  before you can unambiguously declare a new '$name' in this scope");
5349             }
5350             elsif $name ~~ /^\w/ {
5351                 self.sorry("Illegal redeclaration of symbol '$name'$loc");
5352             }
5353             elsif $name ~~ s/^\&// {
5354                 self.sorry("Illegal redeclaration of routine '$name'$loc") unless $name eq '';
5355             }
5356             else {  # XXX eventually check for conformant arrays here
5357                 self.worry("Useless redeclaration of variable $name$loc");
5358             }
5359             return self;
5360         }
5361     }
5362     else {
5363         $*DECLARAND = $curstash.{$name} = $declaring;
5364         $curstash.{$shortname} = $declaring unless $shortname eq $name;
5365         $*DECLARAND<declaredat> = self.pos;
5366         $*DECLARAND<inlex> = $curstash.idref;
5367         $*DECLARAND<signum> = $*SIGNUM if $*SIGNUM;
5368         $*DECLARAND<const> ||= 1 if $*IN_DECL eq 'constant';
5369         $*DECLARAND<used> = 1 if substr($name,0,1) eq '&' and %::MYSTERY{substr($name,1)};
5370         if !$*DECLARAND<const> and $shortname ~~ /^\w+$/ {
5371             $curstash.{"&$shortname"} //= $curstash.{$shortname};
5372             $curstash.{"&$shortname"}<used> = 1;
5373             $sid ~= "::$name";
5374             if $name !~~ /\:\</ {
5375                 $*NEWPKG = $curstash.{$name ~ '::'} = ($p // Stash.new(
5376                     'PARENT::' => $curstash.idref,
5377                     '!file' => $*FILE, '!line' => self.line,
5378                     '!id' => [$sid] ));
5379             }
5380         }
5381     }
5382     self;
5383 }
5384
5385 method add_our_name ($n) {
5386     my $name = $n;
5387     self.deb("add_our_name $name in " ~ $*CURPKG.id) if $*DEBUG +& DEBUG::symtab;
5388     return self if $name ~~ /\:\:\(/;
5389     my $curstash = $*CURPKG;
5390     self.deb("curstash $curstash global $*GLOBAL ", join ' ', %$*GLOBAL) if $*DEBUG +& DEBUG::symtab;
5391     $name ~~ s/\:ver\<.*?\>//;
5392     $name ~~ s/\:auth\<.*?\>//;
5393     my @components = self.canonicalize_name($name);
5394     if @components > 1 {
5395         my $c = self.find_top_pkg(@components[0]);
5396         if $c {
5397             shift @components;
5398             $curstash = $c;
5399         }
5400     }
5401     my $sid = $curstash.id // '???';
5402     while @components > 1 {
5403         my $pkg = shift @components;
5404         $sid ~= "::$pkg";
5405         my $newstash = $curstash.{$pkg} //= Stash.new(
5406             'PARENT::' => $curstash.idref,
5407             '!stub' => 1,
5408             '!id' => [$sid] );
5409         $curstash = $newstash;
5410         self.deb("Adding new package $pkg in $curstash ") if $*DEBUG +& DEBUG::symtab;
5411     }
5412     $name = my $shortname = shift @components;
5413     return self unless defined $name and $name ne '';
5414     if $shortname ~~ /\:/ {
5415         $shortname ~~ s/\:.*//;
5416     }
5417
5418     my $declaring = $*DECLARAND // NAME.new(
5419         xlex => $curstash.idref,
5420         name => $name,
5421         file => $*FILE, line => self.line,
5422         mult => ($*MULTINESS||'only'),
5423         of   => $*OFTYPE,
5424         scope => $*SCOPE,
5425     );
5426     my $old = $curstash.{$name};
5427     if $old and $old<line> and not $old<stub> {
5428         my $omult = $old<mult> // '';
5429         if $declaring === $old {} # already did it somehow
5430         elsif $*SCOPE eq 'use' {}
5431         elsif $*MULTINESS eq 'multi' and $omult ne 'only' {}
5432         elsif $omult eq 'proto' and $*MULTINESS ne 'proto' and $*MULTINESS ne 'only' {}
5433         elsif $*PKGDECL eq 'role' {}
5434         else {
5435             my $ofile = $old.file // 0;
5436             my $oline = $old.line // '???';
5437             my $loc = '';
5438             if $ofile {
5439                 if $ofile !=== $*FILE {
5440                     my $oname = $ofile<name>;
5441                     $loc = " (from $oname line $oline)";
5442                 }
5443                 else {
5444                     $loc = " (from line $oline)";
5445                 }
5446             }
5447             $sid = self.clean_id($sid, $name);
5448             if $name ~~ /^\w/ {
5449                 self.sorry("Illegal redeclaration of symbol '$sid'$loc");
5450             }
5451             elsif $name ~~ s/^\&// {
5452                 self.sorry("Illegal redeclaration of routine '$sid'$loc") unless $name eq '';
5453             }
5454             else {  # XXX eventually check for conformant arrays here
5455                 # (redeclaration of identical package vars is not useless)
5456             }
5457             return self;
5458         }
5459     }
5460     else {
5461         $*DECLARAND = $curstash.{$name} = $declaring;
5462         $curstash.{$shortname} //= $declaring unless $shortname eq $name;
5463         $*DECLARAND<inpkg> = $curstash.idref;
5464         if $shortname ~~ /^\w+$/ and $*IN_DECL ne 'constant' {
5465             $curstash.{"&$shortname"} //= $declaring;
5466             $curstash.{"&$shortname"}<used> = 1;
5467             $sid ~= "::$name";
5468             $*NEWPKG = $curstash.{$name ~ '::'} //= Stash.new(
5469                 'PARENT::' => $curstash.idref,
5470                 '!file' => $*FILE, '!line' => self.line,
5471                 '!id' => [$sid] );
5472         }
5473     }
5474     self.add_my_name($n, $declaring, $curstash.{$name ~ '::'}) if $curstash === $*CURPKG;   # the lexical alias
5475     self;
5476 }
5477
5478 method add_mystery ($token,$pos,$ctx) {
5479     my $name = $token.Str;
5480     return self if $*IN_PANIC;
5481     if self.is_known('&' ~ $name) or self.is_known($name) {
5482         self.deb("$name is known") if $*DEBUG +& DEBUG::symtab;
5483     }
5484     else {
5485         self.deb("add_mystery $name $*CURLEX") if $*DEBUG +& DEBUG::symtab;
5486         %*MYSTERY{$name}.<lex> = $*CURLEX;
5487         %*MYSTERY{$name}.<token> = $token;
5488         %*MYSTERY{$name}.<ctx> = $ctx;
5489         %*MYSTERY{$name}.<line> ~= ',' if %*MYSTERY{$name}.<line>;
5490         %*MYSTERY{$name}.<line> ~= self.lineof($pos);
5491     }
5492     self;
5493 }
5494
5495 method explain_mystery() {
5496     my %post_types;
5497     my %unk_types;
5498     my %unk_routines;
5499     my $m = '';
5500     for keys(%*MYSTERY) {
5501         my $p = %*MYSTERY{$_}.<lex>;
5502         if self.is_name($_, $p) {
5503             # types may not be post-declared
5504             %post_types{$_} = %*MYSTERY{$_};
5505             next;
5506         }
5507
5508         next if self.is_known($_, $p) or self.is_known('&' ~ $_, $p);
5509
5510         # just a guess, but good enough to improve error reporting
5511         if $_ lt 'a' {
5512             %unk_types{$_} = %*MYSTERY{$_};
5513         }
5514         else {
5515             %unk_routines{$_} = %*MYSTERY{$_};
5516         }
5517     }
5518     if %post_types {
5519         my @tmp = sort keys(%post_types);
5520         $m ~= "Illegally post-declared type" ~ ('s' x (@tmp != 1)) ~ ":\n";
5521         for @tmp {
5522             $m ~= "\t'$_' used at line " ~ %post_types{$_}.<line> ~ "\n";
5523         }
5524     }
5525     if %unk_types {
5526         my @tmp = sort keys(%unk_types);
5527         $m ~= "Undeclared name" ~ ('s' x (@tmp != 1)) ~ ":\n";
5528         for @tmp {
5529             $m ~= "\t'$_' used at line " ~ %unk_types{$_}.<line> ~ "\n";
5530         }
5531     }
5532     if %unk_routines {
5533         my @tmp = sort keys(%unk_routines);
5534         $m ~= "Undeclared routine" ~ ('s' x (@tmp != 1)) ~ ":\n";
5535         for @tmp {
5536             $m ~= "\t'$_' used at line " ~ %unk_routines{$_}.<line> ~ "\n";
5537         }
5538     }
5539     self.sorry($m) if $m;
5540     self;
5541 }
5542
5543 method load_setting ($setting) {
5544     $ALL = self.load_lex($setting);
5545
5546     $*CORE = $ALL<CORE>;
5547     $*CORE.<!id> //= ['CORE'];
5548
5549     $*SETTING = $ALL<SETTING>;
5550     $*CURLEX = $*SETTING;
5551
5552     $*GLOBAL = $*CORE.<GLOBAL::> = Stash.new(
5553         '!file' => $*FILE, '!line' => 1,
5554         '!id' => ['GLOBAL'],
5555     );
5556     $*CURPKG = $*GLOBAL;
5557 }
5558
5559 method is_known ($n, $curlex = $*CURLEX) {
5560     my $name = $n;
5561     self.deb("is_known $name") if $*DEBUG +& DEBUG::symtab;
5562     return True if $*QUASIMODO;
5563     return True if $*CURPKG.{$name};
5564     return False if $name ~~ /\:\:\(/;
5565     my $curpkg = $*CURPKG;
5566     my @components = self.canonicalize_name($name);
5567     if @components > 1 {
5568         return True if @components[0] eq 'COMPILING::';
5569         return True if @components[0] eq 'CALLER::';
5570         return True if @components[0] eq 'CONTEXT::';
5571         if $curpkg = self.find_top_pkg(@components[0]) {
5572             self.deb("Found lexical package ", @components[0]) if $*DEBUG +& DEBUG::symtab;
5573             shift @components;
5574         }
5575         else {
5576             self.deb("Looking for GLOBAL::<$name>") if $*DEBUG +& DEBUG::symtab;
5577             $curpkg = $*GLOBAL;
5578         }
5579         while @components > 1 {
5580             my $pkg = shift @components;
5581             self.deb("Looking for $pkg in $curpkg ", join ' ', keys(%$curpkg)) if $*DEBUG +& DEBUG::symtab;
5582             $curpkg = $curpkg.{$pkg};
5583             return False unless $curpkg;
5584             try {
5585                 my $outlexid = $curpkg.[0];
5586                 return False unless $outlexid;
5587                 $curpkg = $ALL.{$outlexid};
5588                 return False unless $curpkg;
5589             };
5590             self.deb("Found $pkg okay, now in $curpkg ") if $*DEBUG +& DEBUG::symtab;
5591         }
5592     }
5593
5594     $name = shift(@components)//'';
5595     self.deb("Final component is $name") if $*DEBUG +& DEBUG::symtab;
5596     return True if $name eq '';
5597     if $curpkg.{$name} {
5598         self.deb("Found") if $*DEBUG +& DEBUG::symtab;
5599         $curpkg.{$name}<used>++;
5600         return True;
5601     }
5602     # leading components take us non-lexical?  assume we can't know
5603     return False if $curpkg !=== $*CURPKG and $curpkg<!id>[0] ~~ /^GLOBAL($|\:\:)/;
5604
5605     my $varbind = { truename => '???' };
5606     return True if $n !~~ /\:\:/ and self.lex_can_find_name($curlex,$name,$varbind);
5607     self.deb("Not Found") if $*DEBUG +& DEBUG::symtab;
5608
5609     return False;
5610 }
5611
5612 method lex_can_find_name ($lex, $name, $varbind) {
5613     self.deb("Looking in ", $lex.id) if $*DEBUG +& DEBUG::symtab;
5614     if $lex.{$name} {
5615         self.deb("Found $name in ", $lex.id) if $*DEBUG +& DEBUG::symtab;
5616         $lex.{$name}<used>++;
5617         return True;
5618     }
5619
5620     my $outlexid = $lex.<OUTER::>[0];
5621     return False unless $outlexid;
5622     my $outlex = $ALL.{$outlexid};
5623
5624     if self.lex_can_find_name($outlex,$name,$varbind) {
5625         # fake up an alias to outer symbol to catch reclaration
5626         my $outname = $outlex.{$name}<name>;
5627         my $outfile = $outlex.{$name}<file>;
5628         my $outline = $outlex.{$name}<line>;
5629         $outname = '<' ~ $outname ~ '>' unless $outname ~~ /\:\:\</;
5630         $outname = "OUTER::" ~ $outname;
5631         $lex.{$name} = NAME.new(
5632             xlex => $lex.idref,
5633             olex => $lex.idref,
5634             name => $outname,
5635             file => $outfile, line => $outline,
5636             rebind => self.line,
5637             varbind => $varbind,
5638             mult => 'only',
5639             scope => $lex.{$name}<scope>,
5640         );
5641         # the innermost lex sets this last to get correct # of OUTER::s
5642         $varbind.<truename> = $outname;
5643         return True;
5644     }
5645
5646     return False;
5647 }
5648
5649 method add_routine ($name) {
5650     @*MEMOS[self.pos]<wasname> = $name if self.is_name($name);
5651     my $vname = '&' ~ $name;
5652     self.add_name($vname);
5653     self;
5654 }
5655
5656 method add_variable ($name) {
5657     my $scope = $*SCOPE || 'our';
5658     return self if $scope eq 'anon';
5659     if $scope eq 'our' {
5660         self.add_our_name($name);
5661     }
5662     else {
5663         self.add_my_name($name);
5664     }
5665     self;
5666 }
5667
5668 method add_constant($name,$value) {
5669     my $*IN_DECL = 'constant';
5670     self.deb("add_constant $name = $value in", $*CURLEX.id) if $*DEBUG +& DEBUG::symtab;
5671     my $*DECLARAND;
5672     self.add_my_name($name);
5673     $*DECLARAND<value> = $value;
5674     self;
5675 }
5676
5677 method add_placeholder($name) {
5678     my $decl = $*CURLEX.<!IN_DECL> // '';
5679     $decl = ' ' ~ $decl if $decl;
5680     my $*IN_DECL = 'variable';
5681
5682     if $*SIGNUM {
5683         return self.sorry("Placeholder variable $name is not allowed in the$decl signature");
5684     }
5685     elsif my $siggy = $*CURLEX.<$?SIGNATURE> {
5686         return self.sorry("Placeholder variable $name cannot override existing signature $siggy");
5687     }
5688     if not $*CURLEX.<!NEEDSIG> {
5689         if $*CURLEX === $*UNIT {
5690             return self.sorry("Placeholder variable $name may not be used outside of a block");
5691         }
5692         return self.sorry("Placeholder variable $name may not be used here because the surrounding$decl block takes no signature");
5693     }
5694     if $name ~~ /\:\:/ {
5695         return self.sorry("Placeholder variable $name may not be package qualified");
5696     }
5697
5698     my $varname = $name;
5699     my $twigil;
5700     my $signame;
5701     $twigil = '^' if $varname ~~ s/\^//;
5702     $signame = $twigil = ':' if $varname ~~ s/\://;
5703     $signame ~= $varname;
5704     return self if $*CURLEX.{'%?PLACEHOLDERS'}{$signame}++;
5705
5706     if $*CURLEX{$varname} {
5707         return self.sorry("$varname has already been used as a non-placeholder in the surrounding$decl block,\n  so you will confuse the reader if you suddenly declare $name here");
5708     }
5709
5710     self.add_my_name($varname);
5711     $*CURLEX{$varname}<used> = 1;
5712     self;
5713 }
5714
5715 method check_variable ($variable) {
5716     my $name = $variable.Str;
5717     self.deb("check_variable $name") if $*DEBUG +& DEBUG::symtab;
5718     my ($sigil, $twigil, $first) = $name ~~ /(\$|\@|\%|\&)(\W*)(.?)/;
5719     given $twigil {
5720         when '' {
5721             my $ok = 0;
5722             $ok ||= $*IN_DECL;
5723             $ok ||= $sigil eq '&';
5724             $ok ||= $first lt 'A';
5725             $ok ||= self.is_known($name);
5726             $ok ||= $name ~~ /.\:\:/ && $name !~~ /MY|UNIT|OUTER|SETTING|CORE/;
5727             if not $ok {
5728                 my $id = $name;
5729                 $id ~~ s/^\W\W?//;
5730                 if $name eq '@_' or $name eq '%_' {
5731                     $variable.add_placeholder($name);
5732                 }
5733                 else {  # guaranteed fail now
5734                     if my $scope = @*MEMOS[$variable.from]<declend> {
5735                         return $variable.sorry("Variable $name is not predeclared (declarators are tighter than comma, so maybe your '$scope' signature needs parens?)");
5736                     }
5737                     elsif $id !~~ /\:\:/ {
5738                         if self.is_known('@' ~ $id) {
5739                             return $variable.sorry("Variable $name is not predeclared (did you mean \@$id?)");
5740                         }
5741                         elsif self.is_known('%' ~ $id) {
5742                             return $variable.sorry("Variable $name is not predeclared (did you mean \%$id?)");
5743                         }
5744                     }
5745                     return $variable.sorry("Variable $name is not predeclared");
5746                 }
5747             }
5748             elsif $*CURLEX{$name} {
5749                 $*CURLEX{$name}<used>++;
5750             }
5751         }
5752         when '^' {
5753             my $*MULTINESS = 'multi';
5754             $variable.add_placeholder($name);
5755         }
5756         when ':' {
5757             my $*MULTINESS = 'multi';
5758             $variable.add_placeholder($name);
5759         }
5760         when '~' {
5761             return %*LANG.{substr($name,2)};
5762         }
5763         when '?' {
5764             if $name ~~ /\:\:/ {
5765                 my ($first) = self.canonicalize_name($name);
5766                 $variable.worry("Unrecognized variable: $name") unless $first ~~ /^(CALLER|CONTEXT|OUTER|MY|SETTING|CORE)\:\:$/;
5767             }
5768             else {
5769                 # search upward through languages to STD
5770                 my $v = $variable.lookup_compiler_var($name);
5771                 $variable.<value> = $v if $v;
5772             }
5773         }
5774     }
5775     self;
5776 }
5777
5778 method lookup_compiler_var($name, $default = Nil) {
5779
5780     # see if they did "constant $?FOO = something" earlier
5781     my $lex = $*CURLEX.{$name};
5782     if defined $lex {
5783         if $lex.<thunk>:exists {
5784             return $lex.<thunk>.();
5785         }
5786         else {
5787             return $lex.<value>;
5788         }
5789     }
5790
5791     given $name {
5792         when '$?FILE'     { return $*FILE<name>; }
5793         when '$?LINE'     { return self.lineof(self.pos); }
5794         when '$?POSITION' { return self.pos; }
5795
5796         when '$?LANG'     { return item %*LANG; }
5797
5798         when '$?LEXINFO'   { return $*CURLEX; }
5799
5800         when '$?PACKAGE'  { return $*CURPKG; }
5801         when '$?MODULE'   { return $*CURPKG; } #  XXX should scan
5802         when '$?CLASS'    { return $*CURPKG; } #  XXX should scan
5803         when '$?ROLE'     { return $*CURPKG; } #  XXX should scan
5804         when '$?GRAMMAR'  { return $*CURPKG; } #  XXX should scan
5805
5806         when '$?PACKAGENAME' { return $*CURPKG.id }
5807
5808         when '$?OS'       { return 'unimpl'; }
5809         when '$?DISTRO'   { return 'unimpl'; }
5810         when '$?VM'       { return 'unimpl'; }
5811         when '$?XVM'      { return 'unimpl'; }
5812         when '$?PERL'     { return 'unimpl'; }
5813
5814         when '$?USAGE'    { return 'unimpl'; }
5815
5816         when '&?ROUTINE'  { return 'unimpl'; }
5817         when '&?BLOCK'    { return 'unimpl'; }
5818
5819         when '%?CONFIG'    { return 'unimpl'; }
5820         when '%?DEEPMAGIC' { return 'unimpl'; }
5821
5822         my $dynvar = self.lookup_dynvar($name);
5823         return $dynvar if defined $dynvar;
5824
5825         return $default if defined $default;
5826         # (derived grammars should default to nextsame, terminating here)
5827         default { self.worry("Unrecognized variable: $name"); return 0; }
5828     }
5829 }
5830
5831 ####################
5832 # Service Routines #
5833 ####################
5834
5835 method panic (Str $s) {
5836     die "Recursive panic" if $*IN_PANIC;
5837     $*IN_PANIC++;
5838     self.deb("panic $s") if $*DEBUG;
5839     my $m;
5840     my $here = self;
5841
5842     # Have we backed off recently?
5843     my $highvalid = self.pos <= $*HIGHWATER;
5844
5845     $here = self.cursor($*HIGHWATER) if $highvalid;
5846
5847     my $first = $here.lineof($*LAST_NIBBLE.from);
5848     my $last = $here.lineof($*LAST_NIBBLE.pos);
5849     if $first != $last {
5850         if $here.lineof($here.pos) == $last {
5851             $m ~= "(Possible runaway string from line $first)\n";
5852         }
5853         else {
5854             $first = $here.lineof($*LAST_NIBBLE_MULTILINE.from);
5855             $last = $here.lineof($*LAST_NIBBLE_MULTILINE.pos);
5856             # the bigger the string (in lines), the further back we suspect it
5857             if $here.lineof($here.pos) - $last < $last - $first  {
5858                 $m ~= "(Possible runaway string from line $first to line $last)\n";
5859             }
5860         }
5861     }
5862
5863     $m ~= $s;
5864
5865     if $highvalid {
5866         $m ~= $*HIGHMESS if $*HIGHMESS;
5867         $*HIGHMESS = $m;
5868     }
5869     else {
5870         # not in backoff, so at "bleeding edge", as it were... therefore probably
5871         # the exception will be caught and re-panicked later, so remember message
5872         $*HIGHMESS ~= $s ~ "\n";
5873     }
5874
5875     $m ~= $here.locmess;
5876     $m ~= "\n" unless $m ~~ /\n$/;
5877
5878     if $highvalid and %$*HIGHEXPECT {
5879         my @keys = sort keys %$*HIGHEXPECT;
5880         if @keys > 1 {
5881             $m ~= "    expecting any of:\n\t" ~ join("\n\t", sort keys %$*HIGHEXPECT) ~ "\n";
5882         }
5883         else {
5884             $m ~= "    expecting @keys\n" unless @keys[0] eq 'whitespace';
5885         }
5886     }
5887     if $m ~~ /infix|nofun/ and not $m ~~ /regex/ and not $m ~~ /infix_circumfix/ {
5888         my @t = $here.suppose( sub { $here.term } );
5889         if @t {
5890             my $endpos = $here.pos;
5891             my $startpos = @*MEMOS[$endpos]<ws> // $endpos;
5892
5893             if self.lineof($startpos) != self.lineof($endpos) {
5894                 $m ~~ s|Confused|Two terms in a row (previous line missing its semicolon?)|;
5895             }
5896             elsif @*MEMOS[$here.pos - 1]<baremeth> {
5897                 $m ~~ s|Confused|Two terms in a row (method call requires colon or parens to take arguments)|;
5898             }
5899             elsif @*MEMOS[$here.pos - 1]<arraycomp> {
5900                 $m ~~ s|Confused|Two terms in a row (preceding is not a valid reduce operator)|;
5901             }
5902             else {
5903                 $m ~~ s|Confused|Two terms in a row|;
5904             }
5905         }
5906         elsif my $type = @*MEMOS[$here.pos - 1]<nodecl> {
5907             my @t = $here.suppose( sub { $here.variable } );
5908             if @t {
5909                 my $variable = @t[0].Str;
5910                 $m ~~ s|Confused|Bare type $type cannot declare $variable without a preceding scope declarator such as 'my'|;
5911             }
5912         }
5913     }
5914     elsif my $type = @*MEMOS[$here.pos - 1]<wasname> {
5915         my @t = $here.suppose( sub { $here.identifier } );
5916         my $name = @t[0].Str;
5917         my $s = $*SCOPE ?? "'$*SCOPE'" !! '(missing) scope declarator';
5918         my $d = $*IN_DECL;
5919         $d = "$*MULTINESS $d" if $*MULTINESS and $*MULTINESS ne $d;
5920         $m ~~ s|Malformed block|Return type $type is not allowed between '$d' and '$name'; please put it:\n  after the $s but before the '$d',\n  within the signature following the '-->' marker, or\n  as the argument of a 'returns' trait after the signature.|;
5921     }
5922
5923     if @*WORRIES {
5924         $m ~= "Other potential difficulties:\n  " ~ join( "\n  ", @*WORRIES) ~ "\n";
5925     }
5926
5927     $*IN_PANIC--;
5928     die $m if $*IN_SUPPOSE;     # just throw the exception back to the supposer
5929     $*IN_PANIC++;
5930
5931     note $Cursor::RED, '===', $Cursor::CLEAR, 'SORRY!', $Cursor::RED, '===', $Cursor::CLEAR, "\n"
5932         unless $*FATALS++;
5933     note $m;
5934     self.explain_mystery();
5935
5936     $*IN_PANIC--;
5937     die "Parse failed\n";
5938 }
5939
5940 regex is_ok {
5941     \N*? '#OK' \h*? $<okif>=[\N*?] \h*? $$
5942 }
5943
5944 method worry (Str $s) {
5945     my $m = $s ~ self.locmess;
5946
5947     # allow compile-time warning suppression with #OK some string
5948     my ($okmaybe) = self.suppose( sub {
5949         self.is_ok;
5950     });
5951     if $okmaybe {
5952         my $okif = $okmaybe<okif>.Str;
5953         return self if $okif eq '' or $s ~~ /$okif/;
5954     }
5955
5956     push @*WORRIES, $m unless %*WORRIES{$s}++;
5957     self;
5958 }
5959
5960 method sorry (Str $s) {
5961     self.deb("sorry $s") if $*DEBUG;
5962     note $Cursor::RED, '===', $Cursor::CLEAR, 'SORRY!', $Cursor::RED, '===', $Cursor::CLEAR, "\n"
5963         unless $*IN_SUPPOSE or $*FATALS++;
5964     if $s {
5965         my $m = $s;
5966         $m ~= self.locmess ~ "\n" unless $m ~~ /\n$/;
5967         if $*FATALS > 10 or $*IN_SUPPOSE {
5968             die $m;
5969         }
5970         else {
5971             note $m unless %*WORRIES{$m}++;
5972         }
5973     }
5974     self;
5975 }
5976
5977 # "when" arg assumes more things will become obsolete after Perl 6 comes out...
5978
5979 method obs (Str $old, Str $new, Str $when = ' in Perl 6') {
5980     %$*HIGHEXPECT = ();
5981     self.panic("Unsupported use of $old;$when please use $new");
5982 }
5983
5984 method sorryobs (Str $old, Str $new, Str $when = ' in Perl 6') {
5985     %$*HIGHEXPECT = ();
5986     self.sorry("Unsupported use of $old;$when please use $new");
5987     self;
5988 }
5989
5990 method worryobs (Str $old, Str $new, Str $when = ' in Perl 6') {
5991     self.worry("Unsupported use of $old;$when please use $new");
5992     self;
5993 }
5994
5995 method dupprefix (Str $bad) {
5996     my $c = substr($bad,0,1);
5997     self.panic("Expecting a term, but found either infix $bad or redundant prefix $c\n  (to suppress this message, please use space between $c $c)");
5998 }
5999
6000 method badinfix (Str $bad) {
6001     self.panic("Preceding context expects a term, but found infix $bad instead");
6002 }
6003
6004 # Since most keys are valid prefix operators or terms, this rule is difficult
6005 # to reach ('say »+«' works), but it's okay as a last-ditch default anyway.
6006 token term:sym<miscbad> {
6007     {} <!{ $*QSIGIL }>
6008     {{
6009         my ($bad) = $¢.suppose( sub {
6010             $¢.infixish;
6011         });
6012         $*HIGHWATER = -1;
6013         $*HIGHMESS = '';
6014         self.badinfix($bad.Str) if $bad;
6015     }}
6016     <!>
6017 }
6018
6019 ## vim: expandtab sw=4 ft=perl6