3 Perl6Regex - compiler and parser for Perl 6 regex
7 =item C<compile_perl6regex(PMC source, PMC adverbs :slurpy :named)>
9 Return the result of compiling C<source> according to Perl 6
10 regex syntax and the associated C<adverbs>. Normally this
11 function is obtained using C<compreg 'PGE::Perl6Regex'> instead
12 of calling it directly.
14 Returns the compiled regular expression. If a C<target>
15 named parameter is supplied, then it will return the parse tree
16 (target='parse'), the expression tree (target='exp'),
17 or the resulting PIR code (target='PIR').
21 .namespace [ 'PGE::Perl6Regex' ]
23 .sub 'compile_perl6regex'
25 .param pmc args :slurpy
26 .param pmc adverbs :slurpy :named
28 unless null adverbs goto set_adverbs
32 $I0 = exists adverbs['grammar']
33 if $I0 goto with_grammar
34 unless args goto adverb_grammar_1
36 adverbs['grammar'] = $S0
39 adverbs['grammar'] = 'PGE::Grammar'
41 $I0 = exists adverbs['name']
43 unless args goto with_name
47 $I0 = exists adverbs['lang']
49 adverbs['lang'] = 'PIR'
51 $I0 = exists adverbs['ignorecase']
52 if $I0 goto with_ignorecase
54 adverbs['ignorecase'] = $I0
56 $I0 = exists adverbs['sigspace']
57 if $I0 goto with_sigspace
58 $I0 = exists adverbs['s']
60 $I0 = exists adverbs['words']
61 if $I0 goto with_words
63 adverbs['sigspace'] = $I0
67 adverbs['sigspace'] = $I0
70 $I0 = adverbs['words']
71 adverbs['sigspace'] = $I0
75 target = adverbs['target']
76 target = downcase target
78 ## If we're passed the results of a previous parse, use it.
80 $I0 = isa source, 'PGE::Match'
81 if $I0 == 0 goto parse
83 if null $P0 goto parse
84 $I0 = isa $P0, 'PGE::Exp'
85 if $I0 == 0 goto parse
90 ## Let's parse the source as a regex
91 $P0 = get_global 'regex'
92 match = $P0(source, adverbs :flat :named)
93 if source == '' goto err_null
94 if target != 'parse' goto check
98 unless match goto check_1
101 if $S0 == $S1 goto analyze
111 pad['lexscope'] = $P0
112 exp = exp.'perl6exp'(pad)
113 if null exp goto err_null
114 .return exp.'compile'(adverbs :flat :named)
118 'parse_error'(match, $I0, 'Null pattern illegal')
122 =item C<regex(PMC mob, PMC adverbs :slurpy :named)>
124 Parses a regex according to Perl 6 regex syntax, and returns
125 the corresponding parse tree.
131 .param pmc adverbs :slurpy :named
134 .local pmc stopstack, optable, match
136 stopstack = get_global '@!stopstack'
137 optable = get_global '$optable'
139 stop = adverbs['stop']
141 match = optable.'parse'(mob, 'stop'=>stop)
149 Initializes the Perl6Regex parser and other data structures
150 needed for compiling regexes.
154 .include 'cclass.pasm'
156 .namespace [ 'PGE::Perl6Regex' ]
158 .sub '__onload' :load
160 p6meta = new 'P6metaclass'
161 p6meta.'new_class'('PGE::Exp::WS', 'parent'=>'PGE::Exp::Subrule')
162 p6meta.'new_class'('PGE::Exp::Alias', 'parent'=>'PGE::Exp')
165 optable = new 'PGE::OPTable'
166 set_global '$optable', optable
168 $P0 = get_global 'parse_term'
169 optable.newtok('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
171 $P0 = get_global 'parse_term_ws'
172 optable.newtok('term:#', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
174 $P0 = get_global 'parse_term_backslash'
175 optable.newtok("term:\\", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
177 optable.newtok('term:^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
178 optable.newtok('term:^^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
179 optable.newtok('term:$$', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
180 optable.newtok('term:\b', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
181 optable.newtok('term:\B', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
182 optable.newtok('term:<<', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
183 optable.newtok('term:>>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
184 optable.newtok(unicode:"term:\xab", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
185 optable.newtok(unicode:"term:\xbb", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
187 optable.newtok('term:.', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
188 optable.newtok('term:\d', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
189 optable.newtok('term:\D', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
190 optable.newtok('term:\s', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
191 optable.newtok('term:\S', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
192 optable.newtok('term:\w', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
193 optable.newtok('term:\W', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
194 optable.newtok('term:\N', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
195 optable.newtok('term:\n', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Newline')
197 $P0 = get_global 'parse_dollar'
198 optable.newtok('term:$', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
200 $P0 = get_global 'parse_subrule'
201 optable.newtok('term:<', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
202 optable.newtok('term:<?', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
203 optable.newtok('term:<!', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
204 optable.newtok('term:<.', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
206 $P0 = get_global 'parse_enumcharclass'
207 optable.newtok('term:<[', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
208 optable.newtok('term:<+', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
209 optable.newtok('term:<-', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
210 optable.newtok('term:<![', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
212 $P0 = get_global 'parse_quoted_literal'
213 optable.newtok("term:'", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
215 optable.newtok('term:::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
216 optable.newtok('term::::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
217 optable.newtok('term:<cut>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
218 optable.newtok('term:<commit>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
220 $P0 = get_global 'parse_closure'
221 optable.newtok("term:{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
223 $P0 = get_global 'parse_action'
224 optable.newtok("term:{*}", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
227 optable.newtok('circumfix:[ ]', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Group')
228 optable.newtok('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CGroup')
230 $P0 = get_global 'parse_quant'
231 optable.newtok('postfix:*', 'looser'=>'term:', 'parsed'=>$P0)
232 optable.newtok('postfix:+', 'equiv'=>'postfix:*', 'parsed'=>$P0)
233 optable.newtok('postfix:?', 'equiv'=>'postfix:*', 'parsed'=>$P0)
234 optable.newtok('postfix::', 'equiv'=>'postfix:*', 'parsed'=>$P0)
235 optable.newtok('postfix:**', 'equiv'=>'postfix:*', 'parsed'=>$P0)
236 $P0 = get_global 'parse_quant_error'
237 optable.newtok('term:*', 'equiv'=>'term:', 'parsed'=>$P0)
238 optable.newtok('term:+', 'equiv'=>'term:', 'parsed'=>$P0)
239 optable.newtok('term:?', 'equiv'=>'term:', 'parsed'=>$P0)
241 optable.newtok('infix:', 'looser'=>'postfix:*', 'assoc'=>'list', 'nows'=>1, 'match'=>'PGE::Exp::Concat')
242 optable.newtok('infix:&', 'looser'=>'infix:', 'nows'=>1, 'match'=>'PGE::Exp::Conj')
243 optable.newtok('infix:|', 'looser'=>'infix:&', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
244 optable.newtok('prefix:|', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
245 optable.newtok('infix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
246 optable.newtok('prefix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
248 optable.newtok('infix::=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
249 optable.newtok('infix:=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
251 $P0 = get_global 'parse_modifier'
252 optable.newtok('prefix::', 'looser'=>'infix:|', 'nows'=>1, 'parsed'=>$P0)
254 optable.newtok('close:}', 'precedence'=>'<', 'nows'=>1)
258 set_global '%esclist', esclist
263 esclist['v'] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
264 esclist['h'] = unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
265 esclist['n'] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
266 # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
268 # Create and store closure preprocessors in %closure_pp
270 set_hll_global ['PGE::Perl6Regex'], '%closure_pp', $P0
271 $P1 = get_hll_global ['PGE::Perl6Regex'], 'PIR_closure'
274 # Create an array for holding stop tokens
275 $P0 = new 'ResizablePMCArray'
276 set_hll_global ['PGE::Perl6Regex'], '@!stopstack', $P0
278 $P0 = get_global 'compile_perl6regex'
279 compreg 'PGE::Perl6Regex', $P0
284 =item C<parse_term(PMC mob [, PMC adverbs :slurpy :named])>
286 Parses literal strings and whitespace.
287 Return a failed match if the stoptoken is found.
293 .param pmc adverbs :slurpy :named
296 .local int pos, lastpos
297 $P0 = getattribute mob, '$.target'
299 $P0 = getattribute mob, '$.pos'
301 lastpos = length target
304 $P0 = get_hll_global ['PGE::Perl6Regex'], '@!stopstack'
307 $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
310 if $I0 == 0 goto not_stop
311 $S0 = substr target, pos, $I0
312 if $S0 == stop goto end_noterm
314 ## find length of word character sequence
316 $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
319 ## if we didn't find any, return no term
320 if litlen == 0 goto end_noterm
322 ## for multi-char unquoted literals, leave the last character
323 ## in case it's quantified (it gets processed as a subsequent term)
324 if litlen < 2 goto term_literal
327 $S0 = substr target, pos, litlen
329 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
330 mob.'result_object'($S0)
335 .return 'parse_term_ws'(mob)
338 (mob) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
343 =item C<parse_term_backslash(mob [, adverbs :slurpy :named])>
345 Parses terms beginning with backslash.
349 .sub 'parse_term_backslash'
351 .param pmc adverbs :slurpy :named
354 .local int pos, lastpos
355 $P0 = getattribute mob, '$.target'
357 $P0 = getattribute mob, '$.pos'
359 lastpos = length target
361 .local string initchar
362 initchar = substr target, pos, 1
363 $I0 = is_cclass .CCLASS_WORD, initchar, 0
364 if $I0 goto term_metachar
367 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
368 mob.'result_object'(initchar)
374 isnegated = is_cclass .CCLASS_UPPERCASE, initchar, 0
375 ## $S0 = downcase FIXME: RT# 48108
379 if $S0 == 'x' goto scan_xdo
380 if $S0 == 'o' goto scan_xdo
381 $P0 = get_global '%esclist'
382 $I0 = exists $P0[$S0]
383 if $I0 == 0 goto err_reserved_metachar
385 .local string charlist
387 if isnegated goto term_charlist
388 $I0 = length charlist
389 if $I0 > 1 goto term_charlist
392 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
393 mob.'result_object'(charlist)
398 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
399 mob.'result_object'(charlist)
400 mob['isnegated'] = isnegated
406 .local int base, decnum, isbracketed
408 base = index ' o d x', $S0
410 $S0 = substr target, pos, 1
411 isbracketed = iseq $S0, '['
414 $S0 = substr target, pos, 1
415 $I0 = index '0123456789abcdef', $S0
416 if $I0 < 0 goto scan_xdo_char_end
417 if $I0 >= base goto scan_xdo_char_end
421 goto scan_xdo_char_loop
425 unless isbracketed goto scan_xdo_end
426 if $S0 == ']' goto scan_xdo_end
427 if $S0 == '' goto err_missing_bracket
428 if $S0 != ',' goto err_bracketed
429 if isnegated goto err_negated_brackets
432 goto scan_xdo_char_loop
435 if isnegated goto term_charlist
438 err_reserved_metachar:
439 parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
441 parse_error(mob, pos, 'Missing close bracket for \\x[...] or \\o[...]')
443 parse_error(mob, pos, 'Invalid digit in \\x[...] or \\o[...]')
444 err_negated_brackets:
445 parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]')
449 =item C<parse_term_ws(PMC mob)>
451 Parses a whitespace term.
458 .local int pos, lastpos
459 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
460 lastpos = length target
463 ## scan for the next non-whitespace character
464 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
465 $S0 = substr target, pos, 1
466 if $S0 != '#' goto end
467 ## we have a #-comment, determine its closing delimiter
469 .local string closedelim
471 $S0 = substr target, pos, 1
472 $I0 = index '<[{(', $S0
473 if $I0 < 0 goto term_ws_loop_1
474 closedelim = substr '>]})', $I0, 1
476 $I0 = index target, closedelim, pos
478 if pos > 0 goto term_ws_loop
486 =item C<parse_quant(PMC mob)>
488 Parses a quantifier, such as *, +, ?, :, and all of their wonderous
497 .local int pos, lastpos
499 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
500 lastpos = length target
502 .local int min, max, suffixpos
506 suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
508 if key == '**' goto quant_suffix
509 if key == ':' goto quant_cut
510 if key == '+' goto quant_max
511 ## quantifier is '?' or '*'
514 if key == '?' goto quant_suffix
515 ## quantifier is '+' or '*'
520 # The postfix:<:> operator may bring us here when it's really a
521 # term:<::> term. So, we check for that here and fail this match
522 # if we really have a cut term.
523 if key != ':' goto quant_suffix
524 $S0 = substr target, pos, 1
525 if $S0 == ':' goto end
526 mob['backtrack'] = PGE_BACKTRACK_NONE
529 suffix = substr target, suffixpos, 2
530 if suffix == ':?' goto quant_eager
531 if suffix == ':!' goto quant_greedy
533 suffix = substr target, suffixpos, 1
534 if suffix == '?' goto quant_eager
535 if suffix == '!' goto quant_greedy
536 if suffix != ':' goto quant
538 mob['backtrack'] = PGE_BACKTRACK_NONE
539 goto quant_skip_suffix
541 mob['backtrack'] = PGE_BACKTRACK_EAGER
542 goto quant_skip_suffix
544 mob['backtrack'] = PGE_BACKTRACK_GREEDY
547 pos = suffixpos + $I0
550 if key != '**' goto quant_set
552 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
554 isconst = is_cclass .CCLASS_NUMERIC, target, pos
555 if isconst goto brace_skip
556 $S0 = substr target, pos, 1
557 if $S0 != "{" goto err_closure
560 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
561 if $I1 <= pos goto err_closure
562 $S0 = substr target, pos
566 $S0 = substr target, pos, 2
567 if $S0 != '..' goto quant_closure_end
570 $S0 = substr target, pos, 1
571 if $S0 != '*' goto quant_range_end
573 goto quant_closure_end
575 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
576 if $I1 <= pos goto err_closure
577 $S0 = substr target, pos
581 if isconst goto brace_skip2
582 $S0 = substr target, pos, 1
583 if $S0 != "}" goto err_closure
586 suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
596 parse_error(mob, pos, "Error in closure quantifier")
600 =item C<parse_quant_error(mob)>
602 Throw an exception for quantifiers in term position.
606 .sub 'parse_quant_error'
610 parse_error(mob, pos, "Quantifier follows nothing in regex")
614 =item C<parse_dollar(PMC mob)>
616 Parse things that begin with a dollar sign, such as scalars,
617 anchors, and match subscripts.
624 .local int pos, lastpos
626 $P0 = getattribute mob, '$.target'
628 $P0 = getattribute mob, '$.pos'
630 lastpos = length target
631 $S0 = substr target, pos, 1
632 if $S0 == '<' goto name
633 $I0 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
634 if $I0 > pos goto numeric
635 $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
636 if $I0 > pos goto scalar
639 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Anchor')
644 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
647 cname = substr target, pos, $I1
648 cname = concat '"', cname
649 cname = concat cname, '"'
655 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
657 cname = substr target, pos, $I1
664 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
665 $I0 = index target, ">", pos
666 if $I0 < pos goto err_close
669 cname = substr target, pos, $I1
671 cname = concat '"', cname
672 cname = concat cname, '"'
679 parse_error(mob, pos, "Missing close '>' in scalar")
684 =item C<parse_subname(STR target, INT pos)>
686 Scan C<target> starting at C<pos> looking for a subrule name
687 (following Perl 6's identifier syntax). Returns any subrule
688 name found, and the ending position of the name.
696 .local int startpos, targetlen
698 targetlen = length target
702 $I1 = find_not_cclass .CCLASS_WORD, target, $I0, targetlen
703 if $I1 == $I0 goto end
705 $S0 = substr target, pos, 2
706 if $S0 != '::' goto end
711 $S0 = substr target, startpos, $I0
716 =item C<parse_subrule(PMC mob)>
718 Parses a subrule token.
726 .local int pos, lastpos
730 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
731 lastpos = length target
733 ## default to non-capturing rule
737 ## see what type of subrule this is
738 if key == '<.' goto scan_subname
739 if key == '<?' goto zerowidth
740 if key == '<!' goto negated
742 ## capturing subrule, get its name/alias
744 .local string subname, cname
745 (subname, pos) = 'parse_subname'(target, pos)
747 $S0 = substr target, pos, 1
748 unless $S0 == '=' goto subrule_arg
749 ## aliased subrule, skip the '=' and get the real name
756 mob['iszerowidth'] = 1
759 (subname, pos) = 'parse_subname'(target, pos)
762 mob['subname'] = subname
763 $S0 = substr target, pos, 1
764 if $S0 == ':' goto subrule_text_arg
765 if $S0 != ' ' goto subrule_end
770 regex = get_global 'regex'
771 $P1 = regex(mob, 'stop'=>'>')
780 pos = find_not_cclass .CCLASS_WHITESPACE, target, $I0, lastpos
781 if pos == $I0 goto end
782 if pos >= lastpos goto end
783 .local string textarg, closedelim
786 $S0 = substr target, pos, 1
787 if $S0 == '"' goto subrule_text_quote
788 if $S0 != "'" goto subrule_text_loop
793 if pos >= lastpos goto end
794 $S0 = substr target, pos, 1
795 if $S0 == closedelim goto subrule_text_end
796 if $S0 != "\\" goto subrule_text_add
798 $S0 = substr target, pos, 1
799 if $S0 == closedelim goto subrule_text_add
800 if $S0 == "\\" goto subrule_text_add
805 goto subrule_text_loop
808 if closedelim == '>' goto subrule_end
810 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
812 $S0 = substr target, pos, 1
813 if $S0 != '>' goto end
816 mob['iscapture'] = iscapture
817 unless iscapture goto end
819 $S0 = concat '"', $S0
820 $S0 = concat $S0, '"'
827 =item C<parse_enumcharclass(PMC mob)>
829 Extract an enumerated character list.
833 .sub 'parse_enumcharclass'
835 .param pmc adverbs :slurpy :named
839 .local int pos, lastpos
841 $P0 = getattribute mob, '$.target'
844 lastpos = length target
847 ## handle the case of <[, <+[, <-[, and <![ as the token
848 ## by converting to <, <+, <-, or <!
849 $S0 = substr op, -1, 1
850 if $S0 != '[' goto parse_loop
855 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
856 if pos >= lastpos goto err_close
857 $S0 = substr target, pos, 1
858 if $S0 != '[' goto subrule
862 .local string charlist
868 ## skip leading whitespace and get next character
869 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
870 if pos >= lastpos goto err_close
871 $S0 = substr target, pos, 1
872 if $S0 == ']' goto enum_close
873 if $S0 == '-' goto err_hyphen
874 if $S0 == '.' goto enum_dotrange
875 if $S0 != "\\" goto enum_addchar
878 ## get escaped character
879 $S0 = substr target, pos, 1
880 ## handle metas such as \n, \t, \r, etc.
881 $I0 = index 'nrtfae0', $S0
882 if $I0 == -1 goto enum_addchar
883 $S0 = substr "\n\r\t\f\a\e\0", $I0, 1
886 if isrange goto enum_addrange
890 ## check if we have a .. range marker
891 if isrange goto enum_addrange
892 $S1 = substr target, pos, 2
893 if $S1 != '..' goto enum_addchar
898 ## add character range to charlist
900 $I2 = ord charlist, -1
904 if $I2 > $I0 goto enum_loop
910 ## create a node for the charlist
911 term = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
913 term.'result_object'(charlist)
918 .local string subname
919 (subname, pos) = 'parse_subname'(target, $I0)
920 if pos == $I0 goto err
921 term = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
924 term['subname'] = subname
925 term['iscapture'] = 0
928 ## find out what operator preceded this term
929 if op == '+' goto combine_plus
930 if op == '-' goto combine_minus
931 if op == '<' goto combine_init
932 if op == '<+' goto combine_init
933 ## token was '<-' or '<!'
934 term['isnegated'] = 1
935 term['iszerowidth'] = 1
936 if op == '<!' goto combine_init
937 ## token is '<-', we need to match a char by concat dot
938 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::CCShortcut')
940 $P0.'result_object'('.')
941 mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
952 ## <a+b> ==> <a> | <b>
953 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
961 ## <a-b> ==> <!b> <a>
962 term['isnegated'] = 1
963 term['iszerowidth'] = 1
964 $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
972 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
973 if pos >= lastpos goto err_close
975 op = substr target, pos, 1
977 if op == '+' goto parse_loop
978 if op == '-' goto parse_loop
979 if op != '>' goto err
984 parse_error(mob, pos, "Error parsing enumerated character class")
987 parse_error(mob, pos, "Unescaped '-' in charlist (use '..' or '\\-')")
990 parse_error(mob, pos, "Missing close '>' or ']>' in enumerated character class")
996 =item C<parse_quoted_literal>
998 Parses '...' literals.
1002 .sub 'parse_quoted_literal'
1004 .local int pos, lastpos
1005 .local string target
1006 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
1007 lastpos = length target
1012 if pos > lastpos goto literal_error
1013 $S0 = substr target, pos, 1
1014 if $S0 == "'" goto literal_end
1015 if $S0 != "\\" goto literal_add
1017 $S0 = substr target, pos, 1
1024 mob.'result_object'(lit)
1028 parse_error(mob, pos, "No closing ' in quoted literal")
1033 =item C<parse_modifier>
1039 .sub 'parse_modifier'
1041 .local int pos, lastpos
1042 .local string target, value
1045 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Modifier')
1046 lastpos = length target
1049 pos = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
1050 if pos == $I0 goto name
1052 value = substr target, $I0, $I1
1055 pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos
1057 $S0 = substr target, $I0, $I1
1059 mob.'result_object'(value)
1060 $S0 = substr target, pos, 1
1061 if $S0 != '(' goto end
1063 pos = index target, ')', pos
1065 $S0 = substr target, $I0, $I1
1066 mob.'result_object'($S0)
1069 ### XXX pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1075 .sub 'parse_closure'
1077 .local string target
1079 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Closure')
1082 $S0 = substr target, pos, 1
1083 if $S0 != "{" goto body
1088 $S0 = repeat "}", len
1089 $I0 = index target, $S0, pos
1090 if $I0 < pos goto err_noclose
1092 $S1 = substr target, pos, $I1
1093 mob.'result_object'($S1)
1098 parse_error(mob, pos, "Missing closing braces for closure")
1105 .local string target
1106 .local int pos, keypos
1107 (mob, pos, target) = mob.'new'(mob, 'grammar' => 'PGE::Exp::Action')
1108 keypos = index target, '#= ', pos
1109 if keypos < 0 goto end
1110 $I0 = find_cclass .CCLASS_NEWLINE, target, pos, keypos
1111 if $I0 < keypos goto end
1112 .local string actionkey
1115 actionkey = substr target, keypos, $I0
1116 mob['actionkey'] = actionkey
1126 .param string message
1127 $P0 = getattribute mob, '$.pos'
1129 $P0 = new 'Exception'
1130 $S0 = 'perl6regex parse error: '
1132 $S0 .= ' at offset '
1136 $P1 = getattribute mob, '$.target'
1138 $S1 = substr $S1, pos, 1
1149 .namespace [ 'PGE::Exp' ]
1151 .sub 'perl6exp' :method
1157 .namespace [ 'PGE::Exp::Literal' ]
1159 .sub 'perl6exp' :method
1161 $I0 = pad['ignorecase']
1162 self['ignorecase'] = $I0
1167 .namespace [ 'PGE::Exp::Concat' ]
1169 .sub 'perl6exp' :method
1172 .local pmc array, exp
1174 array = self.'list'()
1179 if i >= n goto iter_end
1182 exp = exp.'perl6exp'(pad)
1183 if null exp goto iter_loop
1197 .namespace [ 'PGE::Exp::Quant' ]
1199 .sub 'perl6exp' :method
1202 $I0 = exists self['backtrack']
1203 if $I0 goto backtrack_done
1204 self['backtrack'] = PGE_BACKTRACK_GREEDY
1205 $I0 = pad['ratchet']
1206 if $I0 == 0 goto backtrack_done
1207 self['backtrack'] = PGE_BACKTRACK_NONE
1212 isarray = pad['isarray']
1216 exp0 = exp0.'perl6exp'(pad)
1218 pad['isarray'] = isarray
1223 .namespace [ 'PGE::Exp::Group' ]
1225 .sub 'perl6exp' :method
1229 $I0 = self['isquant']
1230 if $I0 goto backtrack_done
1231 $I0 = exists self['backtrack']
1232 if $I0 goto backtrack_done
1233 $I0 = pad['ratchet']
1234 if $I0 == 0 goto backtrack_done
1235 self['backtrack'] = PGE_BACKTRACK_NONE
1239 exp0 = exp0.'perl6exp'(pad)
1245 .namespace [ 'PGE::Exp::CGroup' ]
1247 .sub 'perl6exp' :method
1251 $I0 = self['isquant']
1252 if $I0 goto backtrack_done
1253 $I0 = exists self['backtrack']
1254 if $I0 goto backtrack_done
1255 $I0 = pad['ratchet']
1256 if $I0 == 0 goto backtrack_done
1257 self['backtrack'] = PGE_BACKTRACK_NONE
1260 self['iscapture'] = 1
1261 $I0 = exists self['isscope']
1262 if $I0 goto set_cname
1266 $I0 = exists self['cname']
1267 if $I0 goto set_subpats
1268 $I0 = pad['subpats']
1273 cname = self['cname']
1274 $S0 = substr cname, 0, 1
1275 if $S0 == '"' goto set_lexicals
1278 pad['subpats'] = $I0
1284 lexscope = pad['lexscope']
1285 $I0 = exists lexscope[cname]
1286 if $I0 == 0 goto set_lexicals_1
1287 $P0 = lexscope[cname]
1291 lexscope[cname] = self
1294 padarray = pad['isarray']
1296 self['isarray'] = isarray
1297 $I0 = self['isscope']
1298 if $I0 == 0 goto unscoped
1302 subpats = pad['subpats']
1306 pad['lexscope'] = $P0
1308 exp = exp.'perl6exp'(pad)
1310 pad['lexscope'] = lexscope
1311 pad['isarray'] = padarray
1312 pad['subpats'] = subpats
1317 exp = exp.'perl6exp'(pad)
1324 .namespace [ 'PGE::Exp::Subrule' ]
1326 .sub 'perl6exp' :method
1329 $I0 = self['isquant']
1330 if $I0 goto backtrack_done
1331 $I0 = exists self['backtrack']
1332 if $I0 goto backtrack_done
1333 $I0 = pad['ratchet']
1334 if $I0 == 0 goto backtrack_done
1335 self['backtrack'] = PGE_BACKTRACK_NONE
1338 .local int iscapture, isarray
1340 iscapture = self['iscapture']
1341 if iscapture == 0 goto end
1343 cname = self['cname']
1344 isarray = pad['isarray']
1345 lexscope = pad['lexscope']
1346 $I0 = exists lexscope[cname]
1347 if $I0 == 0 goto lexscope_1
1348 $P0 = lexscope[cname]
1352 lexscope[cname] = self
1353 self['isarray'] = isarray
1355 $S0 = substr cname, 0, 1
1356 if $S0 == '"' goto end
1359 pad['subpats'] = $I0
1365 .namespace [ 'PGE::Exp::WS' ]
1367 .sub 'perl6exp' :method
1370 $I0 = pad['sigspace']
1375 self['subname'] = 'ws'
1376 self['iscapture'] = 0
1377 $I0 = pad['ratchet']
1378 unless $I0 goto end_1
1379 self['backtrack'] = PGE_BACKTRACK_NONE
1385 .namespace [ 'PGE::Exp::Alt' ]
1387 .sub 'perl6exp' :method
1390 .local pmc exp0, exp1
1394 ## if we only have one operand (prefix:|),
1395 ## reduce and return it.
1396 $I0 = defined self[1]
1397 if $I0 goto with_rhs
1398 .return exp0.'perl6exp'(pad)
1401 ## if lhs is whitespace, then this is a prefix-alt and
1402 ## we ignore it (by simply returning its rhs)
1403 $I0 = isa exp0, 'PGE::Exp::WS'
1404 if $I0 == 0 goto with_lhs
1405 .return exp1.'perl6exp'(pad)
1408 .local pmc lexscope, savescope, iter
1409 lexscope = pad['lexscope']
1410 savescope = new 'Hash'
1411 iter = new 'Iterator', lexscope
1413 unless iter goto iter_end
1416 savescope[$P1] = $P2
1419 $I0 = pad['subpats']
1420 exp0 = exp0.'perl6exp'(pad)
1423 $I1 = pad['subpats']
1424 pad['subpats'] = $I0
1425 pad['lexscope'] = savescope
1426 exp1 = exp1.'perl6exp'(pad)
1428 $I0 = pad['subpats']
1429 if $I0 >= $I1 goto end
1430 pad['subpats'] = $I1
1436 .namespace [ 'PGE::Exp::Alias' ]
1438 .sub 'perl6exp' :method
1441 .local pmc exp0, exp1
1444 cname = exp0['cname']
1447 ## If we're aliasing a capture group or a quantified capture
1448 ## group, then we just move the alias name to that group.
1449 ## Otherwise, we need to create a capture group for this
1450 ## alias and return that.
1452 $I0 = isa exp1, 'PGE::Exp::CGroup'
1453 if $I0 == 1 goto make_alias
1454 $I0 = isa exp1, 'PGE::Exp::Subrule'
1455 if $I0 == 1 goto make_alias
1456 $I0 = isa exp1, 'PGE::Exp::Quant'
1457 if $I0 == 0 goto add_cgroup
1459 $I0 = isa $P0, 'PGE::Exp::CGroup'
1460 if $I0 == 0 goto add_cgroup
1461 $P0['cname'] = cname
1466 cexp = self.'new'(self, 'grammar'=>'PGE::Exp::CGroup')
1473 cexp['iscapture'] = 1
1474 cexp['cname'] = cname
1475 cexp = cexp.'perl6exp'(pad)
1479 exp1['cname'] = cname
1480 exp1['iscapture'] = 1
1482 exp1 = exp1.'perl6exp'(pad)
1487 .namespace [ 'PGE::Exp::Modifier' ]
1489 .sub 'perl6exp' :method
1495 if key == 'words' goto sigspace
1496 if key == 's' goto sigspace
1497 if key == 'w' goto sigspace
1498 if key == 'i' goto ignorecase
1510 exp = exp.'perl6exp'(pad)
1516 .namespace [ 'PGE::Exp::Conj' ]
1518 .sub 'perl6exp' :method
1521 $P0 = $P0.'perl6exp'(pad)
1524 $P1 = $P1.'perl6exp'(pad)
1530 .namespace [ 'PGE::Exp::Closure' ]
1532 .sub 'perl6exp' :method
1535 .local pmc closure_pp
1536 .local pmc closure_fn
1539 # see if we need to do any pre-processing of the closure
1540 closure_pp = get_hll_global ['PGE::Perl6Regex'], '%closure_pp'
1541 $I0 = defined closure_pp[lang]
1542 if $I0 == 0 goto end
1543 closure_fn = closure_pp[lang]
1545 $S1 = closure_fn($S1)
1546 self.'result_object'($S1)
1557 =item C<PIR_closure(string code)>
1559 This helper function helps with :lang(PIR) closures in rules
1560 by adding a ".sub" wrapper around the code if one isn't
1566 .namespace [ 'PGE::Perl6Regex' ]
1570 $I0 = index code, '.sub'
1571 if $I0 >= 0 goto end
1572 code = concat ".sub anon :anon\n.param pmc match\n", code
1579 .namespace [ 'PGE::Exp::Action' ]
1581 .sub 'perl6exp' :method
1584 self['actionname'] = $S0
1589 .namespace [ 'PGE::Exp::Cut' ]
1591 .sub 'perl6exp' :method
1594 if $S0 == ':::' goto cut_rule
1595 if $S0 == '<commit>' goto cut_match
1596 self['cutmark'] = PGE_CUT_GROUP
1599 self['cutmark'] = PGE_CUT_RULE
1602 self['cutmark'] = PGE_CUT_MATCH
1610 # vim: expandtab shiftwidth=4 ft=pir: