5 # Copyright 2008-2010, Larry Wall
7 # You may copy this software under the terms of the Artistic License,
8 # version 2.0 or later.
10 $SIG{INT} = sub { print STDERR Carp::longmess("Oops"); exit(1); };
14 no warnings 'recursion';
24 use feature 'say', 'state';
30 $::DEBUG //= 0 + ($ENV{STD5DEBUG} // 0);
33 use constant DEBUG => $::DEBUG;
34 our %LEXERS; # per language, the cache of lexers, keyed by rule name
35 our %FATECACHE; # fates we've already turned into linked lists
38 our %DFALEXERS = (); # per language (class name), then keyed again by rule
40 sub ::fatestr { my $f = shift;
43 $text .= $f->[1] . " " . $f->[2];
44 $text .= ' ' if $f = $f->[0];
62 use Scalar::Util 'refaddr';
65 our $BLUE = color 'blue';
66 our $GREEN = color 'green';
67 our $CYAN = color 'cyan';
68 our $MAGENTA = color 'magenta';
69 our $YELLOW = color 'yellow';
70 our $RED = color 'red';
71 our $CLEAR = color 'clear';
73 use LazyMap qw(lazymap eager);
74 use constant DEBUG => $::DEBUG;
78 # this prevents us from inheriting from Moose::Object, which saves a
79 # good 20 seconds on DESTROY/DEMOLISHALL
80 Moose::Meta::Class->create('CursorBase');
83 $::PERL6HERE = $ENV{PERL6HERE} // '⏏';
84 Encode::_utf8_on($::PERL6HERE);
86 binmode(STDIN, ":utf8");
87 binmode(STDERR, ":utf8");
88 binmode(STDOUT, ":utf8");
91 open(::LOG, ">&1") or die "Can't create $0.log: $!";
94 open(::LOG, ">$0.log") or die "Can't create $0.log: $!";
96 binmode(::LOG, ":utf8");
99 #############################################################
101 #############################################################
103 sub _PARAMS {} # overridden in parametric role packages
105 sub from :lvalue { $_[0]->{_from} //= $_[0]->{_pos} }
106 sub to { $_[0]->{_pos} }
107 sub pos :lvalue { $_[0]->{_pos} }
108 sub chars { $_[0]->{_pos} - ($_[0]->{_from} // $_[0]->{_pos}) }
109 sub Str { no warnings; exists $_[0]->{_from} && defined $_[0]->{_pos} ? substr($::ORIG, $_[0]->{_from}, $_[0]->{_pos} - $_[0]->{_from})//'' : '' }
110 sub xact { $_[0]->{_xact} // die "internal error: cursor has no xact!!!" }
111 sub orig { \$::ORIG }
112 sub WHAT { ref $_[0] || $_[0] }
115 sub caps { $_[0] && $_[0]->{'~CAPS'} ? @{$_[0]->{'~CAPS'}} : () }
116 sub chunks { die "unimpl" }
117 sub ast { exists $_[0]->{'_ast'} ? $_[0]->{'_ast'} : $_[0]->Str }
118 sub make { $_[0]->{'_ast'} = $_[1]; $_[0] }
121 bless { 'file' => $::FILE->{name}, 'pos' => $_[0]->{_pos} }, 'LABEL';
124 sub list { my $self = shift;
126 # can't just do this in numerical order because some might be missing
127 # and we don't know the max
128 for my $k (keys %$self) {
129 $result[$k] = $self->{$k} if $k =~ /^\d/;
134 sub hash { my $self = shift;
136 for my $k (keys %$self) {
137 $result{$k} = $self->{$k} if $k !~ /^[_\d~]/;
142 sub deb { my $self = shift;
143 my $pos = ref $self && defined $self->{_pos} ? $self->{_pos} : "?";
144 print ::LOG $pos,'/',$self->lineof($pos), "\t", $CTX, ' ', @_, "\n";
149 delete $self->{_fate};
150 delete $self->{_pos}; # EXPR blows up without this for some reason
151 delete $self->{_reduced};
152 for my $k (values %$self) {
154 if (ref $k eq 'ARRAY') {
157 $k2->clean if ref $k2;
173 delete $copy{_reduced};
175 my $text = STD::Dump(\%copy);
179 #############################################################
181 #############################################################
186 { no warnings; @::ORIG = unpack("U*", $::ORIG); }
187 $::MEMOS[@::ORIG] = undef; # memos kept by position
188 my %args = ('_pos' => 0, '_from' => 0);
191 $args{'_' . $name} = shift;
193 my $self = bless \%args, ref $class || $class;
194 $self->{_xact} = ['MATCH',0,0];
201 local $::FILE = { name => '(eval)' };
202 $class->initparse($text,@_);
208 local $::FILE = { name => $file };
210 my $tmp_prefix = $args{tmp_prefix} // $ENV{STD5PREFIX} // '';
211 local $::TMP_PREFIX = $tmp_prefix;
213 open(FILE, '<:utf8', $file) or die "Can't open $file: $!\n";
222 $result = $class->initparse($text,@_);
224 if ($::YOU_WERE_HERE) {
225 $result->you_were_here;
227 elsif ($file =~ /\.pm6?$/) {
228 $result->you_were_here;
233 ## method initparse ($text, :$rule = 'TOP', :$tmp_prefix = '', :$setting = 'CORE', :$actions = '')
238 my $rule = $args{rule} // 'TOP';
239 my $tmp_prefix = $args{tmp_prefix} // $ENV{STD5PREFIX} // '';
240 my $setting = $args{setting} // 'CORE';
241 my $actions = $args{actions} // '';
243 local $::TMP_PREFIX = $tmp_prefix;
244 local $::SETTINGNAME = $setting;
245 local $::ACTIONS = $actions;
246 local @::MEMOS = @::MEMOS;
248 local @::ACTIVE = ();
250 # various bits of info useful for error messages
251 local $::HIGHWATER = 0;
252 local $::HIGHMESS = '';
253 local $::HIGHEXPECT = {};
255 local $::LAST_NIBBLE = bless { firstline => 0, lastline => 0 }, 'Cursor';
256 local $::LAST_NIBBLE_MULTILINE = bless { firstline => 0, lastline => 0 }, 'Cursor';
257 local $::GOAL = "(eof)";
258 $text .= "\n" unless substr($text,-1,1) eq "\n";
259 $::ORIG = $text; # original string
261 my $result = $self->new()->$rule();
262 delete $result->{_xact};
264 # XXX here attach stuff that will enable :cont
272 my $syml = $::TMP_PREFIX . 'syml';
273 my $file = "$syml/$setting.syml";
275 bless($self->load_yaml_pad($setting),'Stash');
278 bless($self->load_perl_pad($setting),'Stash');
286 return $PADS{$setting} if $PADS{$setting};
287 my $file = "$setting.pad";
288 my $syml = $::TMP_PREFIX . 'syml';
289 my $store = "$syml/$setting.pad.store";
290 mkdir $syml unless -d $syml;
291 if (-f $store and -M $file and -M $file > -M $store) {
292 $PADS{$setting} = retrieve($store);
295 $PADS{$setting} = require $file;
296 store($PADS{$setting}, $store);
303 open my $fh, $file or die "Can't open $file: $!";
304 my $text = do { local $/; <$fh>; };
313 return $PADS{$setting} if $PADS{$setting};
314 my $syml = $::TMP_PREFIX . 'syml';
315 my $file = "$syml/$setting.syml";
316 my $store = "$syml/$setting.syml.store";
317 mkdir $syml unless -d $syml;
318 if (-f $store and -M $file and -M $file > -M $store) {
319 $PADS{$setting} = retrieve($store);
322 $PADS{$setting} = LoadFile($file);
323 store($PADS{$setting}, $store);
325 # say join ' ', sort keys %{ $PADS{$setting} };
331 $::YOU_WERE_HERE = $::CURPAD;
337 my $file = $::FILE->{name};
339 $file =~ s/(\.setting)?$/.syml/;
342 $file = $::TMP_PREFIX . "syml/" . $file;
345 if ($::YOU_WERE_HERE) {
347 $all->{SETTING} = $::YOU_WERE_HERE;
350 eval { $::UNIT->{'$?SETTING_ID'} = $STD::ALL->{SETTING}->id };
352 eval { $::UNIT->{'$?CORE_ID'} = $STD::ALL->{CORE}->id };
356 for my $key (keys %{$STD::ALL}) {
357 next if $key =~ /^MY:file<\w+\.setting>/ or $key eq 'CORE' or $key eq 'SETTING';
358 $all->{$key} = $STD::ALL->{$key};
363 my @parts = split('/',$file);
364 my $newfile = shift @parts;
366 mkdir $newfile unless -d $newfile;
367 $newfile .= '/' . shift @parts;
370 open(SETTING, ">$file") or die "Can't open new setting file $file: $!";
371 print SETTING Dump($all);
382 sub new { my $self = shift;
387 sub from { my $self = shift;
391 sub to { my $self = shift;
396 #############################################################
397 # Cursor transformations
398 #############################################################
400 sub cursor_xact { my $self = shift;
402 if (DEBUG & DEBUG::cursors) {
404 for (my $x = $self->{_xact}; $x; $x = $x->[-1]) {
408 $pedigree .= ($x->[-2] ? " - " : " + ") . $n;
410 $self->deb("cursor_xact $name$pedigree");
412 # doing this in place is slightly dangerous, but seems to work
413 $self->{_xact} = [$name,0,$self->{_xact}];
417 sub cursor_fresh { my $self = shift;
419 my $lang = @_ && $_[0] ? shift() : ref $self;
420 $self->deb("cursor_fresh lang $lang") if DEBUG & DEBUG::cursors;
421 @r{'_pos','_fate','_xact'} = @$self{'_pos','_fate','_xact'};
422 $r{_herelang} = $self->{_herelang} if $self->{_herelang};
423 bless \%r, ref $lang || $lang;
426 sub cursor_herelang { my $self = shift;
427 $self->deb("cursor_herelang") if DEBUG & DEBUG::cursors;
429 $r{_herelang} = $self;
435 delete $self->{_fate};
436 delete $_->{_xact} for @_;
440 sub cursor_bind { my $self = shift; # this is parent's match cursor
441 my $bindings = shift;
442 my $submatch = shift; # this is the submatch's cursor
443 $self->prepbind($submatch);
445 $self->deb("cursor_bind @$bindings") if DEBUG & DEBUG::cursors;
447 @caps = @{$self->{'~CAPS'}} if $self->{'~CAPS'}; # must copy elems
450 for my $binding (@$bindings) {
451 if (ref $r{$binding} eq 'ARRAY') {
452 push(@{$r{$binding}}, $submatch);
455 $r{$binding} = $submatch;
457 next if $binding eq 'PRE';
458 next if $binding eq 'POST';
459 push @caps, $binding, $submatch;
461 $r{'~CAPS'} = \@caps;
463 $submatch->{_from} = $r{_from} = $r{_pos};
464 $r{_pos} = $submatch->{_pos};
465 $r{_xact} = $self->{_xact};
466 bless \%r, ref $self; # return new match cursor for parent
469 sub cursor_fate { my $self = shift;
473 # $_[0] is now ref to a $trystate;
475 $self->deb("cursor_fate $pkg $name") if DEBUG & DEBUG::cursors;
477 my $lexer = $::LEXERS{ref $self}->{$name} // do {
479 $self->_AUTOLEXpeek($name,$retree);
481 if ($self->{_pos} >= $::HIGHWATER) {
482 if ($self->{_pos} > $::HIGHWATER) {
486 $::HIGHEXPECT->{$lexer->{DBA}}++;
487 $::HIGHWATER = $self->{_pos};
490 my $P = $self->{_pos};
495 $self->cursor_fate_dfa($pkg, $name, $lexer, $P);
498 sub cursor_fate_dfa {
499 my ($self, $pkg, $name, $lexer, $P) = @_;
501 my $state = $lexer->{S};
505 print ::LOG "=" x 10,"\n$p DFA for ${pkg}::$name in ", ref $self, "\n" if DEBUG & DEBUG::autolexer;
507 push @rfates, @{ $state->[0] // _jit_dfa_node($lexer, $state) };
508 if (DEBUG & DEBUG::autolexer) {
509 for (@{ $state->[0] }) {
511 for (my $f = $_; $f; $f = $f->[0]) {
514 print ::LOG " [adding fate @b]\n";
517 last if $p == @::ORIG;
518 my $chi = $::ORIG[$p++];
519 print ::LOG "--- ", pack("U", $chi), "\n" if DEBUG & DEBUG::autolexer;
520 if ($state->[1]{$chi}) {
521 $state = $state->[1]{$chi};
522 print ::LOG "specific -> ", $state->[1]{ID}, "\n"
523 if DEBUG & DEBUG::autolexer;
527 my $dt = $state->[2];
528 while (defined $dt) {
529 if (ref $dt eq 'ARRAY') {
530 if (DEBUG & DEBUG::autolexer) {
531 print ::LOG $dt->[2][-1],
532 (vec($dt->[2][$chi >> 10], $chi & 1023, 1) ?
533 "? yes\n" : "? no\n");
535 $dt = $dt->[vec($dt->[2][$chi >> 10], $chi & 1023, 1)];
537 print ::LOG " -> ", $$dt->[1]{ID}, "\n" if DEBUG & DEBUG::autolexer;
544 sub { @rfates ? pop(@rfates) : () };
548 my ($self, $pkg, $name, $lexer, $P) = @_;
549 # A rudimentary trie walker.
551 if (my $state = $lexer->{T}) {
554 my $ch = $::ORIG[$p]//32;
556 print ::LOG "=" x 10,"\n$p TRIE for ${pkg}::$name in ", ref $self, "\n",
557 $p," ", pack("U",$ch), "\n" if DEBUG & DEBUG::autolexer;
560 if ($state->{'~~'}) {
562 my @x = @{$state->{'~~'}};
563 while (my ($final,$fnum) = splice(@x,0,2)) {
566 print ::LOG $p," probing $fnum $final\n" if DEBUG & DEBUG::autolexer;
568 next unless &{$RXCACHE{$final} //= eval "sub { \$::ORIG =~ /\\G$final/xsgc }"};
569 $pend = pos($::ORIG);
571 push(@{$candidates[$pend - $P][$p - $P]}, $fnum);
572 print ::LOG $pend," FNUM $fnum @",$pend - $P,"\n" if DEBUG & DEBUG::autolexer;
575 { no warnings; last STATE unless $next = $state->{chr $ch}; };
576 if (DEBUG & DEBUG::autolexer) {
577 print ::LOG substr($::ORIG, $P, $p - $P), "\n";
578 print ::LOG $p," ", pack("U",$ch), "\n";
584 my $fates = $lexer->{FATES};
586 # Return longest, most-specific candidate or list of tied candidates.
587 # (If the latter, candidate list itself follows original rule order).
588 # This is essentially a lazy list of LTM candidates, batched by ties.
589 # Subsequent calls return next-most specific, then next-most longest.
591 while (@candidates) {
592 my $long = $candidates[-1];
595 my $specific = pop @$long // next;
596 if (DEBUG & DEBUG::autolexer) {
597 say ::LOG "relex sez ", join ' ', @$specific;
599 return @$fates[@$specific];
608 print ::LOG "FAILED trie at $P\n" if DEBUG & DEBUG::autolexer;
612 sub cursor_all { my $self = shift;
616 $self->deb("cursor_all from $fpos to $tpos") if DEBUG & DEBUG::cursors;
618 @r{'_from','_pos'} = ($fpos,$tpos);
620 bless \%r, ref $self;
623 sub makestr { my $self = shift;
624 $self->deb("maketext @_") if DEBUG & DEBUG::cursors;
630 sub cursor_tweak { my $self = shift;
633 if (DEBUG & DEBUG::cursors) {
634 my $peek = substr($::ORIG,$tpos,20);
637 $self->deb("cursor to $tpos --------->$GREEN$peek$CLEAR");
639 $self->{_pos} = $tpos;
640 return () if $tpos > @::ORIG;
645 sub cursor_incr { my $self = shift;
646 my $tpos = $self->{_pos} + 1;
648 $self->panic("Unexpected EOF") if $tpos > length($::ORIG);
649 if (DEBUG & DEBUG::cursors) {
650 my $peek = substr($::ORIG,$tpos,20);
653 $self->deb("cursor to $tpos --------->$GREEN$peek$CLEAR");
655 $self->{_pos} = $tpos;
656 return () if $tpos > @::ORIG;
661 sub cursor { my $self = shift;
664 $self->panic("Unexpected EOF") if $tpos > length($::ORIG);
665 if (DEBUG & DEBUG::cursors) {
666 my $peek = substr($::ORIG,$tpos,20);
669 $self->deb("cursor to $tpos --------->$GREEN$peek$CLEAR");
672 # $r{_from} = $self->{_pos} // 0;
675 bless \%r, ref $self;
678 sub cursor_force { my $self = shift;
681 $self->panic("Unexpected EOF") if $tpos > length($::ORIG);
682 if (DEBUG & DEBUG::cursors) {
683 my $peek = substr($::ORIG,$tpos,20);
686 $self->deb("cursor to $tpos --------->$GREEN$peek$CLEAR");
689 # $r{_from} = $self->{_pos} // 0;
690 $r{_pos} = $::HIGHWATER = $tpos;
692 bless \%r, ref $self;
695 sub cursor_rev { my $self = shift;
698 if (DEBUG & DEBUG::cursors) {
699 my $peek = substr($::ORIG,$fpos,20);
702 $self->deb("cursor_ref to $fpos --------->$GREEN$peek$CLEAR");
707 bless \%r, ref $self;
710 #############################################################
711 # Regex service routines
712 #############################################################
714 sub callm { my $self = shift;
716 my $class = ref($self) || $self;
721 if (DEBUG & DEBUG::callm_show_subnames) {
722 while (my @c = caller($lvl)) {
728 elsif ($s =~ /^Cursor(?:Base)?::/) {
731 elsif ($s =~ /^LazyMap::/) {
734 elsif ($s =~ /^\(eval\)/) {
738 $extralvl = $lvl unless $extralvl;
745 while (my @c = caller($lvl)) { $lvl++; }
747 my ($package, $file, $line, $subname, $hasargs) = caller(1);
753 $self->deb($name, " [", $file, ":", $line, "] $class") if DEBUG & DEBUG::trace_call;
754 if (DEBUG & DEBUG::callm_show_subnames) {
755 $RED . join(' ', reverse @subs) . $CLEAR . ':' x $extralvl;
763 return $_[0] unless DEBUG & DEBUG::trace_call;
765 warn "Returning non-Cursor: $self\n" unless exists $self->{_pos};
766 my ($package, $file, $line, $subname, $hasargs) = caller(1);
767 $self->deb($subname, " returning @{[$self->{_pos}]}");
771 sub _MATCHIFY { my $self = shift;
775 my $xact = $self->{_xact};
776 my @result = lazymap( sub { my $x = shift; $x->{_xact} = $xact; $x->_REDUCE($S, $name)->retm() }, @_);
785 sub _MATCHIFYr { my $self = shift;
790 # $var->{_from} = $self->{_from};
791 my $xact = $self->{_xact};
792 $var->{_xact} = $xact;
793 $var->_REDUCE($S, $name)->retm();
796 sub _SCANf { my $self = shift;
798 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
801 my $pos = $self->{_pos};
802 my $C = $self->cursor_xact("SCANf $pos");
805 lazymap( sub { $self->cursor($_[0])->retm() }, LazyRange->new($xact, $pos,$eos) );
808 sub _SCANg { my $self = shift;
810 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
811 my $pos = $self->{_pos};
813 my $C = $self->cursor_xact("SCANg $pos");
816 lazymap( sub { $C->cursor($_[0])->retm() }, LazyRangeRev->new($xact, $eos,$pos) );
819 sub _STARf { my $self = shift;
821 no warnings 'recursion';
823 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
825 my $pos = $self->{_pos};
826 my $C = $self->cursor_xact("SCANf $pos");
829 lazymap(sub { $_[0]->retm() },
831 LazyMap->new(sub { $C->_PLUSf($_[0]) }, $block));
834 sub _STARg { my $self = shift;
836 no warnings 'recursion';
838 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
840 my $pos = $self->{_pos};
841 my $C = $self->cursor_xact("STARg $pos");
842 # my $xact = $C->xact;
844 lazymap(sub { $_[0]->retm() }, reverse
846 $C->cursor($self->{_pos}),
851 sub _STARr { my $self = shift;
853 no warnings 'recursion';
855 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
856 my $pos = $self->{_pos};
857 my $prev = $self->cursor_xact("STARr $pos");
858 # my $xact = $prev->xact;
860 my $prev_pos = $prev->{_pos} // 0;
865 last if $prev->{_pos} == $eos;
866 my @matches = $block->($prev); # XXX shouldn't read whole list
868 last unless @matches;
869 my $first = $matches[0]; # no backtracking into block on ratchet
870 last if $first->{_pos} == $prev_pos;
871 $prev_pos = $first->{_pos};
875 $self->cursor($prev_pos)->retm();
878 sub _PLUSf { my $self = shift;
880 no warnings 'recursion';
882 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
884 my $pos = $self->{_pos};
885 my $x = $self->cursor_xact("PLUSf $pos");
888 # don't go beyond end of string
889 return () if $self->{_pos} == @::ORIG;
896 $self->cursor($_[0]->{_pos})->retm()
897 }, $x, LazyMap->new(sub { $x->_PLUSf($_[0]) }, $block)
903 sub _PLUSg { my $self = shift;
905 no warnings 'recursion';
907 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
909 my $pos = $self->{_pos};
910 my $C = $self->cursor_xact("PLUSg $pos");
911 # my $xact = $C->xact;
913 reverse eager($C->_PLUSf($block, @_));
916 sub _PLUSr { my $self = shift;
918 no warnings 'recursion';
920 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
924 my $pos = $self->{_pos};
925 my $to = $self->cursor_xact("PLUSr $pos");
926 # my $xact = $to->xact;
929 last if $to->{_pos} == $eos;
930 my @matches = $block->($to); # XXX shouldn't read whole list
931 last unless @matches;
932 my $first = $matches[0]; # no backtracking into block on ratchet
933 #$first->deb($matches->perl) if DEBUG;
937 return () unless @all;
938 $self->cursor($to->{_pos})->retm();
941 sub _REPSEPf { my $self = shift;
944 no warnings 'recursion';
946 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
949 # don't go beyond end of string
950 return () if $self->{_pos} == @::ORIG;
952 my $pos = $self->{_pos};
953 my $C = $self->cursor_xact("REPSEPf $pos");
954 # my $xact = $C->xact;
957 for my $x ($block->($C)) {
958 for my $s ($sep->($x)) {
959 push @result, lazymap(sub { $C->cursor($_[0]->{_pos}) }, $x, $s->_REPSEPf($sep,$block));
963 lazymap(sub { $_[0]->retm() }, @result);
966 sub _REPSEPg { my $self = shift;
969 no warnings 'recursion';
971 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
973 my $pos = $self->{_pos};
974 my $C = $self->cursor_xact("REPSEPg $pos");
975 # my $xact = $C->xact;
977 reverse eager($C->_REPSEPf($sep, $block, @_));
980 sub _REPSEPr { my $self = shift;
983 no warnings 'recursion';
985 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
989 my $pos = $self->{_pos};
990 my $to = $self->cursor_xact("REPSEPr $pos");
991 # my $xact = $C->xact;
994 last if $to->{_pos} == $eos;
995 my @matches = $block->($to); # XXX shouldn't read whole list
996 last unless @matches;
997 my $first = $matches[0]; # no backtracking into block on ratchet
998 #$first->deb($matches->perl) if DEBUG;
1000 my @seps = $sep->($first);
1005 return () unless @all;
1006 $self->cursor($all[-1]->{_pos})->retm;
1009 sub _OPTr { my $self = shift;
1011 no warnings 'recursion';
1013 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1015 my $pos = $self->{_pos};
1016 my $C = $self->cursor_xact("OPTr $pos");
1017 my $xact = $C->xact;
1019 my $x = ($block->($C))[0];
1020 my $r = $x // $C->cursor_tweak($pos);
1021 $r->{_xact} = $self->{_xact};
1025 sub _OPTg { my $self = shift;
1027 no warnings 'recursion';
1029 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1031 my $pos = $self->{_pos};
1032 my $C = $self->cursor_xact("OPTg $pos");
1033 # my $xact = $C->xact;
1035 my @x = $block->($C);
1037 lazymap(sub { $_[0]->retm() },
1039 $self->cursor($pos));
1042 sub _OPTf { my $self = shift;
1044 no warnings 'recursion';
1046 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1048 my $pos = $self->{_pos};
1049 my $C = $self->cursor_xact("OPTf $pos");
1050 # my $xact = $C->xact;
1052 lazymap(sub { $_[0]->retm() },
1053 $C->cursor($C->{_pos}),
1057 sub _BRACKET { my $self = shift;
1059 no warnings 'recursion';
1061 my $oldlang = ref($self);
1062 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1063 lazymap(sub { bless($_[0],$oldlang)->retm() },
1067 sub _BRACKETr { my $self = shift;
1069 no warnings 'recursion';
1071 my $oldlang = ref($self);
1072 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1073 my ($val) = $block->($self) or return ();
1074 bless($val,$oldlang)->retm();
1077 sub _PAREN { my $self = shift;
1079 no warnings 'recursion';
1081 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1082 lazymap(sub { $_[0]->retm() },
1086 sub _NOTBEFORE { my $self = shift;
1088 no warnings 'recursion';
1090 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1091 local $::HIGHEXPECT = {}; # don't count lookahead as expectation
1092 local $::HIGHWATER = $::HIGHWATER;
1094 @caps = @{$self->{'~CAPS'}} if $self->{'~CAPS'}; # must copy elems
1095 my @all = $block->($self);
1097 $self->{'~CAPS'} = \@caps;
1098 return $self->cursor($self->{_pos})->retm();
1101 sub _NOTCHAR { my $self = shift;
1103 no warnings 'recursion';
1105 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1106 my @all = $block->($self);
1108 return $self->cursor($self->{_pos}+1)->retm();
1111 sub before { my $self = shift;
1113 no warnings 'recursion';
1115 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1116 local $::HIGHEXPECT = {}; # don't count lookahead as expectation
1117 local $::HIGHWATER = $::HIGHWATER;
1119 @caps = @{$self->{'~CAPS'}} if $self->{'~CAPS'}; # must copy elems
1120 my @all = $block->($self);
1121 if (@all and $all[0]) {
1122 $all[0]->{'~CAPS'} = \@caps;
1123 if ($self->{_ast}) {
1124 $all[0]->{'_ast'} = $self->{_ast};
1127 delete $all[0]->{'_ast'};
1129 return $all[0]->cursor_all(($self->{_pos}) x 2)->retm();
1134 sub suppose { my $self = shift;
1136 no warnings 'recursion';
1138 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1139 local $::FATALS = 0;
1142 local $::HIGHWATER = -1;
1144 local $::HIGHEXPECT = {};
1145 local $::IN_SUPPOSE = 1;
1148 @all = $block->($self);
1150 lazymap( sub { $_[0]->retm() }, @all );
1153 sub after { my $self = shift;
1155 no warnings 'recursion';
1157 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1158 local $::HIGHEXPECT = {}; # don't count lookbehind as expectation
1159 my $end = $self->cursor($self->{_pos});
1161 @caps = @{$self->{'~CAPS'}} if $self->{'~CAPS'}; # must copy elems
1162 my @all = $block->($end); # Make sure $_->{_from} == $_->{_pos}
1163 if (@all and $all[0]) {
1164 $all[0]->{'~CAPS'} = \@caps;
1165 if ($self->{_ast}) {
1166 $all[0]->{'_ast'} = $self->{_ast};
1169 delete $all[0]->{'_ast'};
1171 return $all[0]->cursor_all(($self->{_pos}) x 2)->retm();
1176 sub null { my $self = shift;
1177 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1178 return $self->cursor($self->{_pos})->retm();
1181 sub ws__PEEK { ''; }
1185 local $CTX = $self->callm() if DEBUG & DEBUG::trace_call;
1186 my @stub = return $self if exists $::MEMOS[$self->{_pos}]{ws};
1188 my $S = $self->{_pos};
1189 my $C = $self->cursor_xact("RULE ws $S");
1190 # my $xact = $C->xact;
1192 $::MEMOS[$S]{ws} = undef; # exists means we know, undef means no ws before here
1194 $self->_MATCHIFY($S, 'ws',
1195 $C->_BRACKET( sub { my $C=shift;
1197 push @gather, (map { my $C=$_;
1200 $C->_NOTBEFORE( sub { my $C=shift;
1203 } $C->_COMMITRULE())
1204 } $C->before(sub { my $C=shift;
1207 } $C->before( sub { my $C=shift;
1208 $C->after(sub { my $C=shift;
1213 push @gather, (map { my $C=$_;
1215 scalar(do { $::MEMOS[$C->{_pos}]{ws} = $S unless $C->{_pos} == $S }, $C)
1216 } $C->_STARr(sub { my $C=shift;
1226 sub _ASSERT { my $self = shift;
1228 no warnings 'recursion';
1230 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1231 my @all = $block->($self);
1232 if ((@all and $all[0]->{_bool})) {
1233 return $self->cursor($self->{_pos})->retm();
1238 sub _BINDVAR { my $self = shift;
1241 no warnings 'recursion';
1243 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1244 lazymap(sub { $$var = $_[0]; $_[0]->retm() },
1248 sub _SUBSUME { my $self = shift;
1251 no warnings 'recursion';
1252 no warnings 'recursion';
1254 local $CTX = $self->callm($names ? "@$names" : "") if DEBUG & DEBUG::trace_call;
1255 lazymap(sub { $self->cursor_bind($names, $_[0])->retm() },
1256 $block->($self->cursor_fresh()));
1259 sub _SUBSUMEr { my $self = shift;
1262 no warnings 'recursion';
1263 no warnings 'recursion';
1265 local $CTX = $self->callm($names ? "@$names" : "") if DEBUG & DEBUG::trace_call;
1266 my ($var) = $block->($self->cursor_fresh()) or return ();
1267 $self->cursor_bind($names, $var)->retm();
1270 sub _EXACT_rev { my $self = shift;
1271 my $s = shift() // '';
1272 my @ints = unpack("U*", $s);
1274 local $CTX = $self->callm($s) if DEBUG & DEBUG::trace_call;
1275 my $P = $self->{_pos} // 0;
1277 return () unless ($::ORIG[--$P]//-1) == pop @ints;
1279 return $self->cursor($P)->retm();
1282 sub _EXACT { my $self = shift;
1283 my $s = shift() // '';
1284 my @ints = unpack("U*", $s);
1286 local $CTX = $self->callm($s) if DEBUG & DEBUG::trace_call;
1287 my $P = $self->{_pos} // 0;
1289 return () unless ($::ORIG[$P++]//-1) == shift @ints;
1291 return $self->cursor($P)->retm();
1292 # if (substr($::ORIG, $P, $len) eq $s) {
1293 # $self->deb("EXACT $s matched @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1294 # my $r = $self->cursor($P+$len);
1298 # $self->deb("EXACT $s didn't match @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1303 sub _PATTERN { my $self = shift;
1306 local $CTX = $self->callm($qr) if DEBUG & DEBUG::trace_call;
1307 my $P = $self->{_pos} // 0;
1309 if ($::ORIG =~ /$qr/gc) {
1310 my $len = pos($::ORIG) - $P;
1311 $self->deb("PATTERN $qr matched @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1312 my $r = $self->cursor($P+$len);
1316 $self->deb("PATTERN $qr didn't match at $P") if DEBUG & DEBUG::matchers;
1321 sub _BACKREFn { my $self = shift;
1324 local $CTX = $self->callm($n) if DEBUG & DEBUG::trace_call;
1325 my $P = $self->{_pos} // 0;
1326 my $s = $self->{$n}->Str;
1327 my $len = length($s);
1328 if (substr($::ORIG, $P, $len) eq $s) {
1329 $self->deb("EXACT $s matched @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1330 my $r = $self->cursor($P+$len);
1334 $self->deb("EXACT $s didn't match @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1339 sub _SYM { my $self = shift;
1343 $s = $s->[0] if ref $s eq 'ARRAY';
1345 local $CTX = $self->callm($s) if DEBUG & DEBUG::trace_call;
1346 my $P = $self->{_pos} // 0;
1347 my $len = length($s);
1349 ? lc substr($::ORIG, $P, $len) eq lc $s
1350 : substr($::ORIG, $P, $len) eq $s
1352 $self->deb("SYM $s matched @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1353 my $r = $self->cursor($P+$len);
1358 $self->deb("SYM $s didn't match @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1363 #sub _EXACT_rev { my $self = shift;
1366 # local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1367 # my $len = length($s);
1368 # my $from = $self->{_pos} - $len;
1369 # if ($from >= 0 and substr($::ORIG, $from, $len) eq $s) {
1370 # my $r = $self->cursor_rev($from);
1374 ## say "EXACT_rev $s didn't match @{[substr($!orig,$from,$len)]} at $from $len";
1379 sub _ARRAY { my $self = shift;
1380 local $CTX = $self->callm(0+@_) if DEBUG & DEBUG::trace_call;
1381 my $P = $self->{_pos} // 0;
1382 my @array = sort { length($b) <=> length($a) } @_; # XXX suboptimal
1384 for my $s (@array) {
1385 my $len = length($s);
1386 if (substr($::ORIG, $P, $len) eq $s) {
1387 $self->deb("ARRAY elem $s matched @{[substr($::ORIG,$P,$len)]} at $P $len") if DEBUG & DEBUG::matchers;
1388 my $r = $self->cursor($P+$len);
1389 push @result, $r->retm('');
1395 sub _ARRAY_rev { my $self = shift;
1396 local $CTX = $self->callm(0+@_) if DEBUG & DEBUG::trace_call;
1397 my @array = sort { length($b) <=> length($a) } @_; # XXX suboptimal
1399 for my $s (@array) {
1400 my $len = length($s);
1401 my $from = $self->{_pos} = $len;
1402 if (substr($::ORIG, $from, $len) eq $s) {
1403 $self->deb("ARRAY_rev elem $s matched @{[substr($::ORIG,$from,$len)]} at $from $len") if DEBUG & DEBUG::matchers;
1404 my $r = $self->cursor_rev($from);
1405 push @result, $r->retm('');
1411 sub _DIGIT { my $self = shift;
1412 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1413 my $P = $self->{_pos};
1414 my $char = substr($::ORIG, $P, 1);
1415 if ($char =~ /^\d$/) {
1416 my $r = $self->cursor($P+1);
1420 # say "DIGIT didn't match $char at $P";
1425 sub _DIGIT_rev { my $self = shift;
1426 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1427 my $from = $self->{_pos} - 1;
1429 # say "DIGIT_rev didn't match $char at $from";
1432 my $char = substr($::ORIG, $from, 1);
1433 if ($char =~ /^\d$/) {
1434 my $r = $self->cursor_rev($from);
1438 # say "DIGIT_rev didn't match $char at $from";
1443 sub ww { my $self = shift;
1444 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1445 my $P = $self->{_pos};
1446 return () unless $P;
1447 my $chars = substr($::ORIG, $P-1, 2);
1448 if ($chars =~ /^\w\w$/) {
1449 my $r = $self->cursor($P);
1453 # say "ww didn't match $chars at $P";
1458 sub _ALNUM { my $self = shift;
1459 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1460 my $P = $self->{_pos};
1461 my $char = substr($::ORIG, $P, 1);
1462 if ($char =~ /^\w$/) {
1463 my $r = $self->cursor($P+1);
1467 # say "ALNUM didn't match $char at $P";
1472 sub _ALNUM_rev { my $self = shift;
1473 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1474 my $from = $self->{_pos} - 1;
1476 # say "ALNUM_rev didn't match $char at $from";
1479 my $char = substr($::ORIG, $from, 1);
1480 if ($char =~ /^\w$/) {
1481 my $r = $self->cursor_rev($from);
1485 # say "ALNUM_rev didn't match $char at $from";
1493 for my $ch (0..255) {
1494 my $char = chr($ch);
1495 vec($alpha,$ch,1) = 1 if $char =~ /\w/ and $char !~ /\d/;
1498 sub alpha { my $self = shift;
1499 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1500 my $P = $self->{_pos};
1501 # my $char = substr($::ORIG, $P, 1);
1502 my $ch = $::ORIG[$P];
1503 if (vec($alpha,$ch,1) or ($ch > 255 and chr($ch) =~ /\pL/)) {
1504 # if ($char =~ /^[_[:alpha:]\pL]$/) {
1505 my $r = $self->cursor($P+1);
1509 # say "alpha didn't match $char at $P";
1514 sub alpha_rev { my $self = shift;
1515 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1516 my $from = $self->{_pos} - 1;
1520 my $char = substr($::ORIG, $from, 1);
1521 if ($char =~ /^[_[:alpha:]\pL]$/) {
1522 my $r = $self->cursor_rev($from);
1530 sub _SPACE { my $self = shift;
1531 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1532 my $P = $self->{_pos};
1533 my $char = substr($::ORIG, $P, 1);
1534 if ($char =~ /^\s$/) {
1535 my $r = $self->cursor($P+1);
1539 # say "SPACE didn't match $char at $P";
1544 sub _SPACE_rev { my $self = shift;
1545 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1546 my $from = $self->{_pos} - 1;
1548 # say "SPACE_rev didn't match $char at $from";
1551 my $char = substr($::ORIG, $from, 1);
1552 if ($char =~ /^\s$/) {
1553 my $r = $self->cursor_rev($from);
1557 # say "SPACE_rev didn't match $char at $from";
1562 sub _HSPACE { my $self = shift;
1563 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1564 my $P = $self->{_pos};
1565 my $char = substr($::ORIG, $P, 1);
1566 if ($char =~ /^[ \t\r]$/ or ($char =~ /^\s$/ and $char !~ /^[\n\f\0x0b\x{2028}\x{2029}]$/)) {
1567 my $r = $self->cursor($P+1);
1571 # say "HSPACE didn't match $char at $P";
1576 sub _HSPACE_rev { my $self = shift;
1577 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1578 my $from = $self->{_pos} - 1;
1580 # say "HSPACE_rev didn't match $char at $from";
1583 my $char = substr($::ORIG, $from, 1);
1584 if ($char =~ /^[ \t\r]$/ or ($char =~ /^\s$/ and $char !~ /^[\n\f\0x0b\x{2028}\x{2029}]$/)) {
1585 my $r = $self->cursor_rev($from);
1589 # say "HSPACE_rev didn't match $char at $from";
1594 sub _VSPACE { my $self = shift;
1595 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1596 my $P = $self->{_pos};
1597 my $char = substr($::ORIG, $P, 1);
1598 if ($char =~ /^[\n\f\x0b\x{2028}\x{2029}]$/) {
1599 my $r = $self->cursor($P+1);
1603 # say "VSPACE didn't match $char at $P";
1608 sub _VSPACE_rev { my $self = shift;
1609 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1610 my $from = $self->{_pos} - 1;
1612 # say "VSPACE_rev didn't match $char at $from";
1615 my $char = substr($::ORIG, $from, 1);
1616 if ($char =~ /^[\n\f\x0b\x{2028}\x{2029}]$/) {
1617 my $r = $self->cursor_rev($from);
1621 # say "VSPACE_rev didn't match $char at $from";
1626 sub _CCLASS { my $self = shift;
1629 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1630 my $P = $self->{_pos};
1631 my $char = substr($::ORIG, $P, 1);
1632 if ($char =~ /$cc/) {
1633 my $r = $self->cursor($P+1);
1637 # say "CCLASS didn't match $char at $P";
1642 sub _CCLASS_rev { my $self = shift;
1645 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1646 my $from = $self->{_pos} - 1;
1648 # say "CCLASS didn't match $char at $from";
1651 my $char = substr($::ORIG, $from, 1);
1652 if ($char =~ /$cc/) {
1653 my $r = $self->cursor_rev($from);
1657 # say "CCLASS didn't match $char at $from";
1662 sub _ANY { my $self = shift;
1663 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1664 my $P = $self->{_pos};
1666 $self->cursor($P+1)->retm();
1669 # say "ANY didn't match anything at $P";
1674 sub _ANY_rev { my $self = shift;
1675 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1676 my $from = $self->{_pos} - 1;
1680 return $self->cursor_rev($from)->retm();
1683 sub _BOS { my $self = shift;
1684 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1685 my $P = $self->{_pos};
1687 $self->cursor($P)->retm();
1693 sub _BOS_rev { $_[0]->_BOS }
1695 sub _BOL { my $self = shift;
1696 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1697 my $P = $self->{_pos};
1698 if ($P == 0 or substr($::ORIG, $P-1, 1) =~ /^[\n\f\x0b\x{2028}\x{2029}]$/) {
1699 $self->cursor($P)->retm();
1705 sub _BOL_rev { $_[0]->_BOL }
1707 sub _EOS { my $self = shift;
1708 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1709 my $P = $self->{_pos};
1710 if ($P == @::ORIG) {
1711 $self->cursor($P)->retm();
1717 sub _EOS_rev { $_[0]->_EOS }
1719 sub _EOL { my $self = shift;
1720 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1721 my $P = $self->{_pos};
1722 if ($P == @::ORIG or substr($::ORIG, $P, 1) =~ /^(?:\r\n|[\n\f\x0b\x{2028}\x{2029}])$/) {
1723 $self->cursor($P)->retm();
1729 sub _EOL_rev { $_[0]->_EOL }
1731 sub _RIGHTWB { my $self = shift;
1732 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1733 my $P = $self->{_pos};
1734 pos($::ORIG) = $P - 1;
1735 if ($::ORIG =~ /\w\b/) {
1736 $self->cursor($P)->retm();
1742 sub _RIGHTWB_rev { $_[0]->_RIGHTWB }
1744 sub _LEFTWB { my $self = shift;
1745 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1746 my $P = $self->{_pos};
1748 if ($::ORIG =~ /\b(?=\w)/) {
1749 $self->cursor($P)->retm();
1755 sub _LEFTWB_rev { $_[0]->_LEFTWB }
1757 sub _LEFTRESULT { my $self = shift;
1758 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1759 my $P = $self->{_pos};
1761 if ($::ORIG =~ /\b(?=\w)/) {
1762 $self->cursor($P)->retm();
1768 sub _LEFTRESULT_rev { $_[0]->_LEFTWB }
1770 sub _REDUCE { my $self = shift;
1774 $key .= ' ' . $_[0] if @_;
1776 $self->{_reduced} = $key;
1777 $self->{_from} = $S;
1779 eval { $::ACTIONS->$meth($self, @_) };
1780 warn $@ if $@ and not $@ =~ /locate/;
1782 $self->deb("REDUCE $key from " . $S . " to " . $self->{_pos}) if DEBUG & DEBUG::matchers;
1786 sub _COMMITBRANCH { my $self = shift;
1787 my $xact = $self->xact;
1788 # $self->{LAST} = shift() if @_;
1789 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1790 my $P = $self->{_pos};
1793 $self->deb("Commit $$xact[0] to $P") if DEBUG & DEBUG::matchers;
1794 return $self->cursor_xact("CB") if $xact->[0] =~ /^ALT/;
1795 $xact = $xact->[-1];
1797 die "Not in an alternation, so can't commit to a branch";
1800 sub _COMMITLTM { my $self = shift;
1801 my $xact = $self->xact;
1802 # $self->{LAST} = shift() if @_;
1803 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1804 my $P = $self->{_pos};
1807 $self->deb("Commit $$xact[0] to $P") if DEBUG & DEBUG::matchers;
1808 return $self->cursor_xact("CL") if $xact->[0] =~ /^ALTLTM/;
1809 $xact = $xact->[-1];
1811 die "Not in a longest token matcher, so can't commit to a longest token";
1814 sub _COMMITRULE { my $self = shift;
1815 my $xact = $self->xact;
1816 # $self->{LAST} = shift() if @_;
1817 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1818 my $P = $self->{_pos};
1821 $self->deb("Commit $$xact[0] to $P") if DEBUG & DEBUG::matchers;
1822 return $self->cursor_xact("CR") if $xact->[0] =~ /^RULE/;
1823 $xact = $xact->[-1];
1825 die "Not in a rule, so can't commit to rule";
1828 sub commit { my $self = shift;
1829 my $xact = $self->xact;
1830 # $self->{LAST} = shift() if @_;
1831 local $CTX = $self->callm if DEBUG & DEBUG::trace_call;
1832 my $P = $self->{_pos};
1835 $self->deb("Commit $$xact[0] to $P") if DEBUG & DEBUG::matchers;
1836 return $self->cursor_xact("CM") if $xact->[0] =~ /^MATCH/;
1837 $xact = $xact->[-1];
1839 die "Not in a match, so can't commit to match";
1842 sub fail { my $self = shift;
1847 sub bless { CORE::bless $_[1], $_[0]->WHAT }
1849 #############################################################
1850 # JIT lexer generator
1851 #############################################################
1854 ## array of (NFA node) ->
1855 ## 0: non extensible (imperative) flag
1856 ## 0: array of fate (array of fate element)
1857 ## array of (label) at odd index, new index at even
1859 ## each DFA node is array:
1860 ## 0: array of object fates
1861 ## 1: hash of specific cases (char => DFAnode)
1862 ## also carries some debug data
1863 ## 2n: reference to a uniprop hash
1864 ## 2n+1: DFAnode is that hash existed
1865 ## Labels: undef is epsilon link.
1866 ## otherwise list - 1 positive, 0+ negative
1867 ## each is: 1 character, else unicode prop in "Gc/L" form
1868 ## "DFA" lexer structure:
1869 ## {DFA} -> array of refs to all DFA nodes
1870 ## {DBA}, {FILE}, {NAME} same as "RE" lexer structure
1871 ## {S} -> ref to DFA root
1872 ## {NFA} -> NFA structure
1873 ## individual fates in the NFA end with a hook which can be 1 to stop adding
1874 ## fates on the end; it's not always possible to associate a unique fate with
1875 ## each NFA node, consider (a|b)*
1876 ## A NFA or DFA node is accepting if it has a nonempty list of fates
1880 package CursorBase::dfa;
1883 for (@$self) { @$_ = (); }
1887 # Steal data from Perl5's Unicode maps
1888 my %unicode_map_cache;
1889 $unicode_map_cache{ALL} = [scalar("\377" x 128) x 1088, "ALL"] ;
1890 sub _get_unicode_map {
1891 my $propname = shift;
1892 $unicode_map_cache{$propname} //= do {
1893 my @top = (("") x 1088, $propname);
1894 for my $l (split("\n", (do "unicore/lib/$propname.pl"))) {
1895 my ($from, $to) = split("\t", $l);
1897 $to = hex $to || $from;
1899 for (my $x = $from; $x <= $to; $x++) {
1900 vec($top[$x >> 10], $x & 1023, 1) = 1;
1907 # This is the fast path handling for JIT DFA lexer generation (although it gets
1908 # short-circuited if the DFALEXERS entry exists, later). The lexer generation
1909 # process sometimes recurses to this, which is tracked using %::AUTOLEXED; if
1910 # the value is already set, we need to suppress recursion.
1914 #sub _AUTOLEXpeekDFA { my $self = shift;
1916 # my $retree = shift;
1918 # $self->deb("AUTOLEXpeekDFA $key") if DEBUG & DEBUG::autolexer;
1919 # die "Null key" if $key eq '';
1920 # if ($::AUTOLEXED{$key}) { # no left recursion allowed in lexer!
1921 # die "Left recursion in $key" if $fakepos == $::AUTOLEXED{$key};
1922 # $self->deb("Suppressing lexer recursion on $key") if DEBUG & DEBUG::autolexer;
1923 # # Return a faked "immediate imperative" lexer, to end the token
1924 # # Note that this will only happen from inside RE_ast, so the other stuff
1925 # # that CursorBase itself uses isn't needed
1926 # return { NFA => [[1, [[1]]]] };
1928 # $key = 'termish' if $key eq 'EXPR';
1929 # return $::DFALEXERS{ref $self}->{$key} //=
1930 # $self->_AUTOLEXgenDFA($key, $retree);
1933 sub _dump_nfa { my ($name, $nfa) = @_;
1934 print ::LOG "--- BEGIN NFA DUMP ($name) ---\n";
1935 for my $ix (0 .. @$nfa-1) {
1937 for (my $j = 2; $j < @{ $nfa->[$ix] }; $j += 2) {
1938 push @go, "[" . join("-",@{$nfa->[$ix][$j] // []}) . "] => " . $nfa->[$ix][$j+1];
1940 my $h = sprintf "%4d: %-30s %s ", $ix, join(", ", @go),
1941 ($nfa->[$ix][0]{I} ? 'I' : ' ');
1942 my @fate = map { my @x = @$_;
1944 push @x, "..." if $y;
1945 join(" ", "-->", @x) } @{ $nfa->[$ix][1] };
1946 @fate = ('') if !@fate;
1948 print ::LOG $h . $_ . "\n";
1949 $h = ' ' x length($h);
1952 print ::LOG "---- END NFA DUMP ----\n";
1955 sub _dtree_dump { my ($ord, $dt) = @_;
1956 print ::LOG (" " x (2 + $ord));
1958 print ::LOG "END\n";
1959 } elsif (ref $dt ne 'ARRAY') {
1960 print ::LOG ($$dt)->[1]{ID}, "\n";
1962 print ::LOG $dt->[2][-1], "?\n";
1963 _dtree_dump($ord+1, $dt->[1]);
1964 _dtree_dump($ord+1, $dt->[0]);
1968 sub _dfa_dump_node { my ($dfan) = @_;
1970 my @gor = %{ $dfan->[1] };
1971 while (my ($a, $b) = splice @gor, 0, 2) {
1972 next if $a eq 'DESC';
1974 push @go, "'" . ::qm(chr $a) . "' => " . $b->[1]{ID};
1976 printf ::LOG "%-30s %-30s\n", $dfan->[1]{DESC} . ":", join(", ", @go);
1977 _dtree_dump(0, $dfan->[2]);
1978 for (@{ $dfan->[0] }) {
1980 for (my $fate = $_; $fate; $fate = $fate->[0]) {
1981 push @arr, $fate->[1], $fate->[2];
1983 print ::LOG " --> ", join(" ", @arr), "\n";
1987 sub _elem_matches { my ($char, $element) = @_;
1988 if (length($element) == 1) {
1989 return $char eq $element;
1992 return vec(_get_unicode_map($element)->[$i >> 10], $i & 1023, 1);
1996 my %boolean_tables = map { $_, 1 } qw/AHex Alpha BidiC BidiM CE CI CWCF CWCM
1997 CWKCF CWL CWT CWU Cased CompEx DI Dash Dep Dia Ext GrBase GrExt Hex Hyphen
1998 IDC IDS IDSB IDST Ideo JoinC Lower Math NChar NFDQC OAlpha ODI OGrExt OIDC
1999 OIDS OLower OMath OUpper PatSyn PatWS QMark Radical SD STerm Space Term
2000 UIdeo Upper VS XIDC XIDS/;
2001 sub _elem_excludes { my ($up1, $up2) = @_;
2002 my ($t1, $v1) = split "/", $up1;
2003 my ($t2, $v2) = split "/", $up2;
2005 return 0 if $t1 ne $t2;
2006 return 0 if $v1 eq $v2;
2008 return 1 if $boolean_tables{$t1};
2009 return 1 if $t1 eq 'Gc' && (length($v1) == length($v2)
2010 || substr($v1, 0, 1) ne substr($v2, 0, 1));
2015 sub _elem_implies { my ($up1, $up2) = @_;
2016 my ($t1, $v1) = split "/", $up1;
2017 my ($t2, $v2) = split "/", $up2;
2019 return 0 if $t1 ne $t2;
2020 return 1 if $v1 eq $v2;
2022 return 1 if $t1 eq 'Gc' && substr($v1, 0, 1) eq $v2;
2027 sub _elem_dich { my ($up1, $up2) = @_;
2028 my ($t1, $v1) = split "/", $up1;
2029 my ($t2, $v2) = split "/", $up2;
2031 return 0 if $t1 ne $t2;
2032 return 0 if $v1 eq $v2;
2034 return 1 if $boolean_tables{$t1};
2039 # for my $x (\*_elem_implies, \*_elem_excludes, \*_elem_dich) {
2041 # my $z = *$x{NAME};
2044 # my $ret = $y->(@args);
2045 # print STDERR "$z : @args => $ret\n";
2050 sub _decision_tree { my ($thunk, @edges) = @_;
2053 TERM: for (my $i = 0; $i < @edges; $i += 2) {
2054 for my $c (@{ $edges[$i] }) {
2055 next if $c eq 'ALL';
2061 if (defined $branch) {
2065 for (my $i = 0; $i < @edges; $i += 2) {
2066 my ($p, @n) = @{ $edges[$i] };
2068 if (!_elem_excludes($branch, $p) &&
2069 !(grep { _elem_implies($branch, $_) } @n)) {
2070 my $pp = _elem_implies($branch, $p) ? 'ALL' : $p;
2071 my @nn = grep { !_elem_excludes($branch, $_) } @n;
2072 push @true, [ $pp, @nn ], $edges[$i+1];
2075 if (!_elem_implies($p, $branch) &&
2076 !(grep { _elem_dich($branch, $_) } @n)) {
2077 my $pp = _elem_dich($branch, $p) ? 'ALL' : $p;
2078 my @nn = grep { !_elem_implies($_, $branch) } @n;
2079 push @false, [ $pp, @nn ], $edges[$i+1];
2083 return [ _decision_tree($thunk, @false),
2084 _decision_tree($thunk, @true),
2085 _get_unicode_map($branch) ];
2087 # all edges are labelled [ALL]
2089 for (my $i = 1; $i < @edges; $i += 2) {
2090 vec($bm, $edges[$i], 1) = 1;
2092 return ($bm ne '') ? (\ $thunk->($bm)) : undef;
2096 sub _tangle_edges { my ($our_edges, $thunk) = @_;
2100 for (my $i = 0; $i < @$our_edges; $i += 2) {
2101 next unless $our_edges->[$i];
2102 for (@{ $our_edges->[$i] }) {
2103 if (length($_) == 1) {
2104 $used_chars{$_} = 1;
2111 # First, all specifically mentioned characters are floated to the initial
2114 for my $ch (keys %used_chars) {
2116 EDGE: for (my $i = 0; $i < @$our_edges; $i += 2) {
2117 next unless $our_edges->[$i];
2118 next unless _elem_matches($ch, $our_edges->[$i][0]);
2119 for (my $j = 1; $j < @{ $our_edges->[$i] }; $j++) {
2120 next EDGE if _elem_matches($ch, $our_edges->[$i][$j]);
2122 vec($bm, $our_edges->[$i+1], 1) = 1;
2124 $next_1{ord $ch} = $thunk->($bm);
2127 # Now clean them out so the decision tree engine doesn't have to deal with
2129 $our_edges = [ @$our_edges ];
2130 for (my $i = 0; $i < @$our_edges; ) {
2131 if (!$our_edges->[$i] || length($our_edges->[$i][0]) == 1) {
2132 splice @$our_edges, $i, 2;
2134 $our_edges->[$i] = [grep { length($_) > 1 } @{ $our_edges->[$i] }];
2139 \%next_1, _decision_tree($thunk, @$our_edges);
2142 sub _jit_dfa_node { my ($lexer, $node) = @_;
2143 my $nfa2dfa = sub { my $nbm = shift;
2144 $lexer->{NFA2DFA}->{$nbm} //= do {
2146 $node[1] = { ID => scalar(@{ $lexer->{DFA} }), BITS => $nbm };
2147 push @{ $lexer->{DFA} }, \@node;
2152 my $bf = $node->[1]{BITS};
2153 my $id = $node->[1]{ID};
2154 my $nfa = $lexer->{NFA};
2157 my @nfixes = grep { vec($bf, $_, 1) } (0 .. length($bf)*8 - 1);
2162 my $nix = pop @grey;
2163 next if $black{$nix};
2165 my $nfn = $nfa->[$nix];
2167 push @{ $node->[0] }, @{ $nfn->[1] };
2168 for (my $i = 2; $i < @$nfn; $i += 2) {
2170 push @grey, $nfn->[$i+1];
2172 push @ouredges, $nfn->[$i], $nfn->[$i+1];
2177 for my $fate (@{ $node->[0] }) {
2178 my @a = reverse @$fate;
2181 for (my $i = 1; $i < @a; $i += 3) {
2183 $fo = [ $fo, $a[$i+2], $a[$i+1] ];
2188 @{ $node->[0] } = map { $_->[1] } sort { $b->[0] cmp $a->[0] } @{ $node->[0] };
2191 push @$node, _tangle_edges(\@ouredges, $nfa2dfa);
2192 $node->[1]{DESC} = $id . "{" . join(",", @nfixes) . "}";
2193 $node->[1]{ID} = $id;
2195 if (DEBUG & DEBUG::autolexer) {
2196 print ::LOG "JIT DFA node generation:\n";
2197 _dfa_dump_node($node);
2203 sub _AUTOLEXgenDFA { my ($self, $key, $retree) = @_;
2204 local $::AUTOLEXED{$key} = $fakepos;
2206 my $ast = $retree->{$key};
2207 my $dba = $ast->{dba};
2209 my $d = DEBUG & DEBUG::autolexer;
2210 print ::LOG "generating DFA lexer for $key -->\n" if $d;
2213 if ($key =~ /(.*):\*$/) {
2216 my $protopat = $1 . '__S_';
2217 my $protolen = length($protopat);
2220 my @stack = ref $self;
2224 my $class = pop @stack;
2225 push @stack, reverse @{ $class . "::ISA" };
2226 my $stash = \%{ $class . "::" };
2227 my @methods = sort grep {
2228 substr ($_, 0, $protolen) eq $protopat &&
2229 substr($_, -6, 6) eq '__PEEK';
2231 for my $method (@methods) {
2232 my $callname = $class . '::' . $method;
2233 $method = substr($method, 0, length($method)-6);
2234 my $peeklex = $self->$callname();
2235 die "$proto has no lexer!?" unless $peeklex->{NFA};
2237 push @pat, ::nfaltmprefate($proto, "${class}::$method",
2238 $j++, $peeklex->{NFA});
2242 $nfa = ::nfadisj(@pat);
2244 $nfa = $ast->nfa($self);
2249 die "dba unspecified" unless $dba;
2251 _dump_nfa($key, $nfa) if $d;
2253 my $dfa = CORE::bless [], 'CursorBase::dfa';
2254 push @$dfa, [ undef, { BITS => "\001", ID => 0 } ];
2256 { DBA => $dba, DFA => $dfa, NFA2DFA => { "\001" => $dfa->[0] },
2257 NFA => $nfa, S => $dfa->[0] };
2260 sub lexers { my $self = shift;
2261 my $lang = ref $self;
2262 $self->deb("LANG = $lang") if DEBUG & DEBUG::autolexer;
2263 $::LEXERS{$lang} //= {};
2266 sub _AUTOLEXpeek { my $self = shift;
2270 $self->deb("AUTOLEXpeek $key") if DEBUG & DEBUG::autolexer;
2271 die "Null key" if $key eq '';
2272 if ($::AUTOLEXED{$key}) { # no left recursion allowed in lexer!
2273 die "Left recursion in $key" if $fakepos == $::AUTOLEXED{$key};
2274 $self->deb("Suppressing lexer recursion on $key") if DEBUG & DEBUG::autolexer;
2275 return { NFA => [[{I=>1}, [[1]]]] }; # (but if we advanced just assume a :: here)
2277 $key = 'termish' if $key eq 'EXPR';
2278 return $::LEXERS{ref $self}->{$key} //= do {
2279 $self->_AUTOLEXgenDFA($key, $retree);
2284 sub _AUTOLEXgen { my $self = shift;
2288 my $lang = ref $self;
2289 if ($lang =~ /^ANON/) {
2291 my $super = join '+', @{ref($self) . '::ISA'};
2292 my $category = ${ref($self) . '::CATEGORY'} // '.';
2293 print ::LOG "AUTOLEXgen $key in $lang from $super without $category\n" if DEBUG & DEBUG::mixins;
2294 my $superlexer = $self->cursor_fresh($super)->_AUTOLEXpeek($key,$retree);
2296 for my $pat (@{$superlexer->{PATS}}) {
2297 if ($pat =~ / $category /) {
2298 print ::LOG "\tNope: $pat\n" if DEBUG & DEBUG::mixins;
2303 # no need to regen a sublexer that will turn out the same
2304 return $superlexer if $same;
2306 $self->deb("AUTOLEXgen $key in $lang") if DEBUG & DEBUG::autolexer;
2308 (my $dir = $::TMP_PREFIX . 'lex::' . $lang) =~ s/::/\//g;
2309 (my $file = $key) =~ s/::/-/g;
2312 my $dba = $retree->{$key}{dba};
2315 $dba =~ s/_0[01]$//;
2316 $dba =~ s/_(\d\d)$/ (alt $1)/;
2320 my $cache_key = "$dir/$file.store";
2321 my $cached_lexer = $lexer_cache{$cache_key};
2322 if ($STORABLE and -e $cache_key) {
2324 if ($cached_lexer) {
2325 #Cache hit, Keep them coming ;-)
2326 $lexer = $cached_lexer;
2327 # say "GOT HERE $cache_key";
2331 $lexer_cache{$cache_key} = $lexer = retrieve($cache_key);
2333 my $pat = $lexer->{PATS};
2338 if ( m/\(\?#FATE(\d+) +(.*?)\)/) {
2339 warn "MISMATCH $i $1" unless $i == $1;
2343 die "Whoops, no fate in storage";
2346 if ($fate = $FATECACHE{$fstr}) {
2347 $fates->[$i] = $fate;
2350 $FATECACHE{$fstr} = $fate = $fates->[$i] = [0,0,0];
2351 while ($fstr =~ s/(\S+)\s+(\S+)\s*//) {
2354 if ($fate->[0] = $FATECACHE{$fstr}) {
2357 $fate = $fate->[0] //= [0,0,0] if $fstr ne '';
2362 $lexer->{FATES} = $fates;
2365 elsif (open(LEX, "$dir/$file")) {
2366 binmode(LEX, ":utf8");
2367 $self->deb("using cached $dir/$file") if DEBUG & DEBUG::autolexer;
2369 chomp($name = <LEX>);
2374 $lexer{NAME} = $name;
2376 $lexer{FILE} = "$dir/$file";
2377 my @pat = split(/\n/, $para[0]);
2378 $lexer{PATS} = \@pat;
2382 s/\(\?#FATE\d* +(.*?)\)/(?#FATE$i $1)/;
2384 my $fate = $fates->[$i] = [0,0,0];
2385 while ($fstr =~ s/(\S+)\s+(\S+)\s*//) {
2388 $fate = $fate->[0] = [0,0,0] if $fstr ne '';
2392 $lexer{FATES} = $fates;
2394 $lexer{T} = Load($para[1]) if $TRIE and @para > 1;
2400 { package RE_base; 1; }
2402 my $oldfakepos = $::AUTOLEXED{$key} // 0;
2403 $::AUTOLEXED{$key} = $fakepos;
2404 my $ast = $retree->{$key};
2405 if ($ast and ref $ast ne 'HASH') {
2406 @pat = $ast->longest($self);
2408 else { # a protomethod, look up all methods it can call
2410 if ($proto =~ s/:\*$//) {
2411 my $protopat = $proto . '__S_';
2412 my $protolen = length($protopat);
2414 for my $class ($self->meta->linearized_isa) {
2415 for my $method (sort $class->meta->get_method_list) {
2416 if (substr($method,0,$protolen) eq $protopat) {
2417 next if $method =~ /__PEEK/;
2418 my $callname = $class . '::' . $method . '__PEEK';
2419 my $peeklex = $self->$callname();
2420 if ($peeklex and $peeklex->{PATS}) {
2421 my @alts = @{$peeklex->{PATS}};
2422 for my $alt (@alts) {
2423 $alt .= "\t(?#FATE)" unless $alt =~ /FATE/;
2424 $alt =~ s/\(\?#FATE\d*/(?#FATE $proto ${class}::$method/;
2438 s/(\t\(\?#FATE.*?\))(.*)/$2$1/;
2439 s/(\(\?#::\))+/(?#::)/;
2445 if ( s/\(\?#FATE\d* +(.*?)\)/(?#FATE$i $1)/) {
2449 $_ .= "\t(?#FATE$i )";
2453 if ($fate = $FATECACHE{$fstr}) {
2454 $fates->[$i] = $fate;
2457 $FATECACHE{$fstr} = $fate = $fates->[$i] = [0,0,0];
2458 while ($fstr =~ s/(\S+)\s+(\S+)\s*//) {
2461 if ($fate->[0] = $FATECACHE{$fstr}) {
2464 $fate = $fate->[0] //= [0,0,0] if $fstr ne '';
2469 warn "(null pattern for $key)" unless @pat;
2470 my $pat = join("\n", @pat);
2472 $::AUTOLEXED{$key} = $oldfakepos;
2478 for my $fnum (0..@pat-1) {
2479 my ($chars) = $pat[$fnum];
2480 $chars =~ s/\(\?#::\)//g;
2483 while ($chars ne '') {
2484 last if $chars =~ m/^\t/;
2485 if ($chars =~ s/^\\(\W)([*+?{]?)//) {
2487 $final = "\\$1$2$chars";
2490 push(@chars, ord($1));
2493 if ($chars =~ s/^(\\\w.*)//) {
2497 if ($chars =~ /^(\[\S+)/) {
2501 if ($chars =~ /^(\.\S*)/) {
2505 if ($chars =~ s/^(.)([*+?{]?)//) {
2507 $final = "$1$2$chars";
2510 push(@chars, unpack('U',$1));
2515 for my $ch (@chars) {
2516 my $char = chr($ch);
2517 if (my $next = $state->{$char}) {
2521 $state = $state->{$char} = {};
2524 push @{$state->{'~~'}}, $final, $fnum;
2529 $lexer = { "NAME" => $name, "FILE" => "$dir/$file", "PATS" => [@pat], "FATES" => $fates, "T" => $T, "DBA" => $dba};
2531 return $lexer if $lang =~ /ANON/;
2534 use File::Path 'mkpath';
2538 delete $lexer->{FATES};
2539 store($lexer, "$dir/$file.store");
2540 $lexer->{FATES} = $fates;
2541 if ($file eq 'termish') {
2542 copy("$dir/termish.store","$dir/EXPR.store")
2543 or croak "Could not copy $dir/termish.store";
2547 open(my $cache, '>:utf8', "$dir/$file") // die "Can't print: $!";
2548 print $cache $name,"\n";
2549 print $cache join("\n",@pat),"\n\n" or die "Can't print: $!";
2550 print $cache Dump($T) if $T and not $STORABLE;
2551 close($cache) or die "Can't close: $!";
2552 $self->deb("regenerated $dir/$file") if DEBUG & DEBUG::autolexer;
2553 # force operator precedence method to look like a term
2554 if ($file eq 'termish') {
2555 copy("$dir/termish","$dir/EXPR")
2556 or croak "Could not copy $dir/termish";
2562 #############################################################
2563 # Parser service routines
2564 #############################################################
2569 @$self{keys %args} = values %args;
2575 my $O = $self->{O} or return ();
2577 for (my ($k,$v) = each %$O) {
2585 my $traitname = shift;
2589 $text = $param->[0]->Str;
2590 $text =~ s/^<(.*)>$/$1/ or
2591 $text =~ s/^\((.*)\)$/$1/;
2593 if ($traitname eq 'export') {
2594 if (defined $text) {
2600 $self->set_export($text);
2603 elsif (defined $text) {
2614 my $textpkg = $text . '::';
2615 my $name = $::DECLARAND->{name};
2616 my $xpad = $STD::ALL->{ (delete $::DECLARAND->{xpad})->[0] };
2617 $::DECLARAND->{export} = $text;
2618 my $sid = $::CURPAD->idref;
2619 my $x = $xpad->{'EXPORT::'} //= Stash::->new( 'PARENT::' => $sid, '!id' => [$sid->[0] . '::EXPORT'] );
2620 $x->{$textpkg} //= Stash::->new( 'PARENT::' => $x->idref, '!id' => [$sid->[0] . '::EXPORT::' . $text] );
2621 $x->{$textpkg}->{$name} = $::DECLARAND;
2622 $x->{$textpkg}->{'&'.$name} = $::DECLARAND
2623 if $name =~ /^\w/ and $::IN_DECL ne 'constant';
2629 my $WHAT = ref($self)||$self;
2632 my $NEWWHAT = $WHAT . '::';
2634 for my $mixin (@mixins) {
2635 my $ext = ref($mixin) || $mixin;
2637 $ext =~ s/(\w)\w*::/$1/g; # just looking for a "cache" key, really
2638 $NEWWHAT .= '_X_' . $ext;
2640 $self->deb("mixin $NEWWHAT from $WHAT @newmix") if DEBUG & DEBUG::mixins;
2642 if (not exists &{$NEWWHAT.'::meta'}) { # never composed this one yet?
2643 # fake up mixin with MI, being sure to put "roles" in front
2644 my $eval = "package $NEWWHAT; use Moose ':all' => { -prefix => 'moose_' }; moose_extends('$WHAT'); moose_with(" . join(',', map {"'$_'"} @newmix) . ");our \$CATEGORY = '.';\n";
2645 $self->deb($eval) if DEBUG & DEBUG::mixins;
2649 return $self->cursor_fresh($NEWWHAT);
2654 my $class = ref $self;
2658 $self->deb("Calling $class" . '::multitweak') if DEBUG & DEBUG::mixins;
2659 &{$class . '::multitweak'}($self,@_);
2661 return $retval if $retval;
2662 die $@ unless $@ =~ /^NOMATCH|^Undefined subroutine/;
2663 last unless $class =~ s/(.*)::.*/$1/;
2667 sub clean_id { my $self = shift;
2668 my ($id,$name) = @_;
2669 my $file = $::FILE->{name};
2672 $id =~ s/^MY:file<CORE.setting>.*?::/CORE::/;
2673 $id =~ s/^MY:file<\w+.setting>.*?::/SETTING::/;
2674 $id =~ s/^MY:file<\Q$file\E>$/UNIT/;
2675 $id =~ s/:pos\(\d+\)//;
2680 # remove consistent leading whitespace (mutates text nibbles in place)
2682 sub trim_heredoc { my $doc = shift;
2683 my ($stopper) = $doc->stopper or
2684 $doc->panic("Couldn't find delimiter for heredoc\n");
2685 my $ws = $stopper->{ws}->Str;
2686 return $stopper if $ws eq '';
2689 $wsequiv =~ s{^(\t+)}[' ' x (length($1) * ($::TABSTOP // 8))]xe;
2691 # We can't use ^^ after escapes, since the escape may be mid-line
2692 # and we'd get false positives. Use a fake newline instead.
2693 $doc->{nibbles}[0] =~ s/^/\n/;
2695 for (@{$doc->{nibbles}}) {
2696 next if ref $_; # next unless $_ =~ Str;
2698 # prefer exact match over any ws
2699 s{(?<=\n)(\Q$ws\E|[ \t]+)}{
2701 if ($white eq $ws) {
2705 $white =~ s[^ (\t+) ][ ' ' x (length($1) * ($::TABSTOP // 8)) ]xe;
2706 if ($white =~ s/^\Q$wsequiv\E//) {
2715 $doc->{nibbles}[0] =~ s/^\n//; # undo fake newline
2719 sub add_categorical { my $lang = shift;
2722 $name =~ s/:<<(.*)>>$/:«$1»/;
2723 my $WHAT = ref $lang;
2725 # :() is a signature modifier, not an operator
2726 if ($name =~ /^\w+:\(/) {
2727 # XXX canonicalize sig here eventually
2728 $lang->add_my_name($name);
2732 if ($name =~ s/^(\w+):(?=[«<{[])/$1:sym/) {
2734 my ($sym) = $name =~ /:sym(.*)/;
2735 $sym =~ s/^<\s*(.*\S)\s*>$/<$1>/g;
2736 $sym =~ s/^\[\s*(.*\S)\s*\]$/$1/g;
2737 if ( $sym =~ s/\\x\[(.*)\]/\\x{$1}/g) {
2738 $sym = '«' . eval($sym) . '»';
2740 elsif ($sym =~ s/\\c\[(.*)\]/\\N{$1}/g ) {
2741 $sym = '«' . eval("use charnames ':full'; $sym") . '»';
2744 # unfortunately p5 doesn't understand q«...»
2745 if ($sym =~ s/^«\s*(.*\S)\s*»$/$1/) {
2747 for my $try (qw( ' / ! : ; | + - = )) {
2748 $ok = $try, last if index($sym,$try) < 0;
2750 $sym = $ok . $sym . $ok;
2753 my $canon = substr($sym,1,length($sym)-2);
2754 $canon =~ s/([<>])/\\$1/g;
2755 my $canonname = $cat . ':<' . $canon . '>';
2756 $lang->add_my_name($canonname);
2759 $sym = '[qw' . $sym . ']';
2765 my $rule = "token $name { <sym> }";
2767 # produce p5 method name
2769 $mangle =~ s/^(\w*):(sym)?//;
2772 if ($mangle =~ s/^<(.*)>$/$1/ or
2773 $mangle =~ s/^«(.*)»$/$1/) {
2774 $mangle =~ s/\\(.)/$1/g;
2775 @list = $mangle =~ /(\S+)/g;
2777 elsif ($mangle =~ s/^\[(.*)\]$/$1/ or
2778 $mangle =~ s/^\{(.*)\}$/$1/) {
2779 $mangle =~ s/\\x\[(.*)\]/\\x{$1}/g;
2780 @list = eval $mangle;
2782 elsif ($mangle =~ m/^\(\"(.*)\"\)$/) {
2788 $mangle = ::mangle(@list);
2789 $mangle = $category . '__S_' . sprintf("%03d",$GEN++) . $mangle;
2791 # XXX assuming no equiv specified, but to do that right,
2792 # this should be delayed till the block start is seen
2794 if ($name =~ /^infix:/) {
2795 $coercion = 'additive';
2797 elsif ($name =~ /^prefix:/) {
2798 if ($sym =~ /^.\W/) {
2799 $coercion = 'symbolic_unary';
2802 $coercion = 'named_unary';
2805 elsif ($name =~ /^postfix:/) {
2806 $coercion = 'methodcall';
2808 elsif ($name =~ /^circumfix:/) {
2811 elsif ($name =~ /^postcircumfix:/) {
2812 $coercion = 'methodcall';
2814 elsif ($name =~ /^term:/) {
2818 state $genpkg = 'ANON000';
2824 use Moose ':all' => { -prefix => 'moose_' };
2825 moose_extends('$WHAT');
2830 '$mangle' => bless({
2844 our \$CATEGORY = '$category';
2846 sub ${mangle}__PEEK { \$_[0]->_AUTOLEXpeek('$mangle',\$retree) }
2849 local \$CTX = \$self->callm() if \$::DEBUG & DEBUG::trace_call;
2851 my \$sym = \$args{sym} // $sym;
2853 my \$xact = ['RULE $mangle', 0, \$::XACT];
2854 local \$::XACT = \$xact;
2856 my \$S = \$self->{_pos};
2857 my \$C = \$self->cursor_xact("RULE $mangle \$S");
2858 # my \$xact = \$C->xact;
2860 \$C->{'sym'} = \$sym;
2862 \$self->_MATCHIFY(\$S, '$mangle',
2864 if (my (\$C) = (\$C->_SYM(\$sym, 0))) {
2865 \$C->_SUBSUMEr(['O'], sub {
2867 \$C->O(%STD::$coercion)
2888 my $starter = $list[0];
2889 my $stopper = $list[1];
2893 use Moose ':all' => { -prefix => 'moose_' };
2894 moose_extends('$WHAT');
2899 '$mangle' => bless({
2906 'dba' => '$category expression',
2914 'dba' => '$category expression',
2923 'dba' => '$category expression',
2932 'dba' => '$category expression',
2935 'name' => 'semilist',
2945 'dba' => '$category expression',
2953 'dba' => '$category expression',
2962 'name' => 'FAILGOAL',
2971 'rest' => '(|%term)',
2978 our \$CATEGORY = '$category';
2980 sub ${mangle}__PEEK { \$_[0]->_AUTOLEXpeek('$mangle',\$retree) }
2982 no warnings 'recursion';
2984 local \$::CTX = \$self->callm() if \$::DEBUG & DEBUG::trace_call;
2986 local \$::sym = \$args{sym} // $sym;
2987 return () if \$::GOAL eq $starter;
2989 my \$C = \$self->cursor_xact("RULE $mangle");
2990 my \$xact = \$C->xact;
2991 my \$S = \$C->{'_pos'};
2992 \$C->{'sym'} = ref \$sym ? join(' ', \@\$sym) : \$sym;
2994 \$self->_MATCHIFYr(\$S, "$mangle",
2996 if (my (\$C) = (\$C->_EXACT($starter))) {
2998 if (my (\$C) = (((local \$::GOAL = $stopper , my \$goalpos = \$C), \$C)[-1])) {
3000 if (my (\$C) = (\$C->_SUBSUMEr(['semilist'], sub {
3005 if (my (\$C) = (\$C->_BRACKETr(sub {
3008 my \$C = \$C->cursor_xact('ALT ||');
3009 my \$xact = \$C->xact;
3012 push \@gather, \$C->_EXACT($stopper)
3016 push \@gather, \$C->FAILGOAL($stopper , '$category expression',\$goalpos)};
3020 \$C->_SUBSUMEr(['O'], sub {
3022 \$C->O(%STD::$coercion)
3050 $lang->deb("derive $genpkg from $WHAT adding $mangle") if DEBUG & DEBUG::mixins;
3051 eval $e or die "Can't create $name: $@\n";
3052 $::LANG{'MAIN'} = $lang->cursor_fresh($genpkg);
3053 my $oldlexer = $::LEXERS{$WHAT};
3054 my $newlexer = $::LEXERS{$genpkg} //= {};
3055 print ::LOG "=====================================\nADD $rule => $mangle\n" if DEBUG & DEBUG::mixins;;
3056 for my $name (sort keys %{$oldlexer}) {
3057 print ::LOG " $name:\n" if DEBUG & DEBUG::mixins;
3059 for my $pat (@{$oldlexer->{$name}->{PATS}}) {
3060 if ($pat =~ / $category /) {
3061 print ::LOG "\t$pat\n" if DEBUG & DEBUG::mixins;
3066 # no need to regen a sublexer that will turn out the same
3067 $newlexer->{$name} = $oldlexer->{$name} if $same;
3073 sub add_enum { my $self = shift;
3076 return unless $type;
3077 return unless $expr;
3078 my $typename = $type->Str;
3079 local $::IN_DECL = 'constant';
3080 # XXX complete kludge, really need to eval EXPR
3081 $expr =~ s/:(\w+)<\S+>/$1/g; # handle :name<string>
3082 for ($expr =~ m/([a-zA-Z_]\w*)/g) {
3083 $self->add_name($typename . "::$_");
3084 $self->add_name($_);
3089 sub do_use { my $self = shift;
3094 $self->do_need($module);
3095 $self->do_import($module,$args);
3099 sub do_need { my $self = shift;
3101 my $module = $m->Str;
3102 my $modfile = $module;
3105 my $std = -x 'std' ? './std' : 'std';
3106 if (not @::PERL6LIB) {
3107 if ($ENV{PERL6LIB}) {
3108 @::PERL6LIB = split ':', $ENV{PERL6LIB}
3111 @::PERL6LIB = qw( ./lib . );
3114 $modfile =~ s/::/\//g;
3116 for my $d (@::PERL6LIB) {
3117 if (-f "$d/$modfile.pm6") {
3122 elsif (-f "$d/$modfile.pm") {
3125 open PM, "$d/$modfile.pm" or next;
3128 next if $pm =~ /^\s*package\s+\w+\s*;/m; # ignore p5 code
3135 my $syml = $::TMP_PREFIX . 'syml';
3136 mkdir $syml unless -d $syml;
3139 $ext = '.pm6' unless -f "$syml/$modfile$ext.syml";
3140 if (-f "$syml/$modfile$ext.syml") {
3141 $topsym = LoadFile("$syml/$modfile$ext.syml");
3142 $self->worry("Can't locate module $module, only its symbol table file");
3145 $self->worry("Can't locate module $module");
3148 elsif (-f "$syml/$modfile$ext.syml" and -M "$lib/$modfile$ext" > -M "$syml/$modfile$ext.syml") {
3149 $topsym = LoadFile("$syml/$modfile$ext.syml");
3151 elsif (-f "$lib/$modfile$ext") {
3152 # say "$std $lib/$module$ext";
3153 system "$std $lib/$module$ext" and die "Can't compile $lib/$module$ext";
3154 $topsym = LoadFile("$syml/$modfile$ext.syml");
3157 $self->worry("Module $module disappeared during load");
3159 $self->add_my_name($module);
3160 $::DECLARAND->{really} = $topsym;
3164 sub do_import { my $self = shift;
3168 my $module = $m->Str;
3169 if ($module =~ /(class|module|role|package)\s+(\S+)/) {
3173 my $pkg = $self->find_stash($module);
3174 if ($pkg->{really}) {
3175 $pkg = $pkg->{really}->{UNIT};
3178 $pkg = $self->find_stash($module . '::');
3181 my $text = $args->Str;
3182 return $self unless $text;
3183 while ($text =~ s/^\s*:?(OUR|MY|STATE|HAS|AUGMENT|SUPERSEDE)?<(.*?)>,?//) {
3184 my $scope = lc($1 // 'my');
3186 local $::SCOPE = $scope;
3187 @imports = split ' ', $imports;
3192 eval { @tagimports = keys %{ $pkg->{'EXPORT::'}->{$_} }; };
3193 $self->do_import_aliases($pkg, @tagimports);
3195 elsif ($pkg->{$_}{export}) {
3196 $self->add_my_name($_, $pkg->{$_});
3198 elsif ($pkg->{'&'.$_}{export}) {
3200 $self->add_my_name($_, $pkg->{$_});
3202 elsif ($pkg->{$_}) {
3203 $self->worry("Can't import $_ because it's not exported by $module");
3208 $self->add_my_name($_);
3214 return $self unless $pkg;
3215 eval { @imports = keys %{ $pkg->{'EXPORT::'}->{'DEFAULT::'} }; };
3216 local $::SCOPE = 'my';
3217 $self->do_import_aliases($pkg, @imports);
3223 sub do_import_aliases {
3226 # say "attempting to import @_";
3229 next if /^PARENT::/;
3231 $self->add_my_name($_, $pkg->{$_});
3236 sub canonicalize_name { my $self = shift;
3238 $name =~ s/^([\$\@\%\&])(\^|:(?!:))/$1/;
3239 $name =~ s/\b:[UD_]$//;
3240 return $name unless $name =~ /::/;
3241 $self->panic("Can't canonicalize a run-time name at compile time: $name") if $name =~ /::\(/;
3242 $name =~ s/^([\$\@%&][!*=?:^.]?)(.*::)(.*)$/$2<$1$3>/;
3244 if ($name =~ s/::<(.*)>$//) {
3247 my @components = split(/(?<=::)/, $name, -1);
3248 shift(@components) while @components and $components[0] eq '';
3249 if (defined $vname) {
3250 $components[-1] .= '::' if @components and $components[-1] !~ /::$/;
3251 push(@components, $vname) if defined $vname;
3256 sub lookup_dynvar { my $self = shift;
3259 if ($name =~ s/^\$\?/::/) {
3260 return $$name if defined $$name;
3262 elsif ($name =~ s/^\@\?/::/) {
3263 return \@$name if defined *$name{ARRAY};
3265 elsif ($name =~ s/^\%\?/::/) {
3266 return \%$name if defined *$name{HASH};
3271 sub mark_sinks { my $self = shift;
3272 my $statements = shift;
3273 return $self unless @$statements;
3274 my @s = @$statements;
3275 my $final = pop(@s);
3278 $self->worry("Useless use of " . $s->Str . " in sink context");
3280 $s->{_pure} = 1; # nothing is pure :)
3286 sub is_pure { my $self = shift;
3287 return 1 if $self->{_pure};
3292 sub check_old_cclass { my $self = shift;
3293 my $innards = shift;
3295 my $prev = substr($::ORIG,$self->{_pos}-length($innards)-4,2);
3296 return $self if $prev =~ /=\s*$/; # don't complain on $var = [\S] capture
3309 $neg = '-' if $i =~ s/^\^//;
3313 if ($i =~ s/^-(.)/$1/) {
3314 $singleok = $doubleok = 0;
3315 $cclass .= $last ? '..' : '\\-';
3318 elsif ($i =~ /^\|./ and $cclass ne '') {
3319 return $self; # probable alternation
3321 elsif ($i =~ s/^\|//) {
3323 $singleok = $doubleok = 0;
3326 elsif ($i =~ /^[*+?]/ and $cclass ne '') {
3327 return $self; # probable quantifier
3329 elsif ($i =~ s/^\\?'//) {
3331 $single .= '\\' . $last;
3335 elsif ($i =~ s/^\\?"//) {
3338 $double .= '\\' . $last;
3341 elsif ($i =~ s/^(\\[btnrf0])//) {
3342 $last = eval '"' . $1 . '"';
3347 elsif ($i =~ s/(\\x\w\w)//) {
3348 $last = eval '"' . $1 . '"';
3353 elsif ($i =~ s/(\\0[0-7]{1,3})//) {
3354 $last = eval '"' . $1 . '"';
3356 $double .= "\\o" . substr($1,1);
3357 $cclass .= "\\o" . substr($1,1);
3359 elsif ($i =~ s/^(\\[sSwWdD])//) {
3360 $singleok = $doubleok = 0;
3364 elsif ($i =~ s/^(\\?\t)//) {
3370 elsif ($i =~ s/^(\\?\x20)//) {
3376 elsif ($i =~ s/^\.//) {
3378 $singleok = $doubleok = 0;
3381 elsif ($i =~ s/^\\(.)//) {
3384 $double .= '\\' . $last;
3385 $cclass .= '\\' . $last;
3387 elsif ($i =~ s/^(.)//s) {
3397 if ($last ne '' and $seen{$last}++) {
3398 return $self; # dup likely indicates not a character class
3402 my $common = "[$innards] appears to be an old-school character class;";
3404 # XXX not Unicodey yet
3406 return $self->worry("$common non-digits should be matched with \\D instead") if $cclass eq '\\d';
3407 return $self->worry("$common non-newlines should be matched with \\N instead") if $cclass eq '\\n';
3409 return $self->worry("$common non-(horizontal whitespace) should be matched with \\H instead") if $single =~ /\A[ \t\b\r]*\z/;
3410 return $self->worry("$common non-(vertical whitespace) should be matched with \\V instead") if $single =~ /\A[\n\f]*\z/;
3411 return $self->worry("$common non-whitespace should be matched with \\S instead") if $single =~ /\A[ \t\b\r\n\f]*\z/;
3412 return $self->worry("$common please use <-[$cclass]> if you mean a character class");
3415 return $self->worry("$common please use <-[$cclass]> if you mean a character class");
3419 return $self->worry("$common digits should be matched with \\d instead") if $cclass eq '\\d';
3421 return $self->worry("$common horizontal whitespace should be matched with \\h instead") if $single =~ /\A[ \t\b\r]*\z/;
3422 return $self->worry("$common vertical whitespace should be matched with \\v instead") if $single =~ /\A[\n\f]*\z/;
3423 return $self->worry("$common whitespace should be matched with \\s instead") if $single =~ /\A[ \t\b\r\n\f]*\z/;
3425 if ($singleok and $single eq $double) {
3426 return $self->worry("$common please use <[$cclass]> if you\n mean a character class, or quote it like '$single' to match\n string as a unit");
3429 return $self->worry("$common please use <[$cclass]> if you\n mean a character class, or quote it like \"$double\" to match\n string as a unit");
3433 return $self->worry("$common please use <${neg}[$cclass]> if you mean a character class");
3436 return $self->worry("$common please use <${neg}[$cclass]> if you\n mean a character class, or put whitespace inside like [ $innards ] to disable\n this warning");
3441 ## vim: expandtab sw=4