compilers/pge/PGE/Perl6Regex.pir

   1 =head1 TITLE
   2
   3 Perl6Regex - compiler and parser for Perl 6 regex
   4
   5 =over 4
   6
   7 =item C<compile_perl6regex(PMC source, PMC adverbs :slurpy :named)>
   8
   9 Return the result of compiling C<source> according to Perl 6
  10 regex syntax and the associated C<adverbs>.  Normally this
  11 function is obtained using C<compreg 'PGE::Perl6Regex'> instead
  12 of calling it directly.
  13
  14 Returns the compiled regular expression.  If a C<target>
  15 named parameter is supplied, then it will return the parse tree
  16 (target='parse'), the expression tree (target='exp'),
  17 or the resulting PIR code (target='PIR').
  18
  19 =cut
  20
  21 .namespace [ 'PGE::Perl6Regex' ]
  22
  23 .sub 'compile_perl6regex'
  24     .param pmc source
  25     .param pmc args            :slurpy
  26     .param pmc adverbs         :slurpy :named
  27
  28     unless null adverbs goto set_adverbs
  29     adverbs = new 'Hash'
  30
  31   set_adverbs:
  32     $I0 = exists adverbs['grammar']
  33     if $I0 goto with_grammar
  34     unless args goto adverb_grammar_1
  35     $S0 = shift args
  36     adverbs['grammar'] = $S0
  37     goto with_grammar
  38   adverb_grammar_1:
  39     adverbs['grammar'] = 'PGE::Grammar'
  40   with_grammar:
  41     $I0 = exists adverbs['name']
  42     if $I0 goto with_name
  43     unless args goto with_name
  44     $S0 = shift args
  45     adverbs['name'] = $S0
  46   with_name:
  47     $I0 = exists adverbs['lang']
  48     if $I0 goto with_lang
  49     adverbs['lang'] = 'PIR'
  50   with_lang:
  51     $I0 = exists adverbs['ignorecase']
  52     if $I0 goto with_ignorecase
  53     $I0 = adverbs['i']
  54     adverbs['ignorecase'] = $I0
  55   with_ignorecase:
  56     $I0 = exists adverbs['sigspace']
  57     if $I0 goto with_sigspace
  58     $I0 = exists adverbs['s']
  59     if $I0 goto with_s
  60     $I0 = exists adverbs['words']
  61     if $I0 goto with_words
  62     $I0 = adverbs['w']
  63     adverbs['sigspace'] = $I0
  64     goto with_sigspace
  65   with_s:
  66     $I0 = adverbs['s']
  67     adverbs['sigspace'] = $I0
  68     goto with_sigspace
  69   with_words:
  70     $I0 = adverbs['words']
  71     adverbs['sigspace'] = $I0
  72   with_sigspace:
  73
  74     .local string target
  75     target = adverbs['target']
  76     target = downcase target
  77
  78     ##   If we're passed the results of a previous parse,  use it.
  79     .local pmc match, exp
  80     $I0 = isa source, 'PGE::Match'
  81     if $I0 == 0 goto parse
  82     $P0 = source['expr']
  83     if null $P0 goto parse
  84     $I0 = isa $P0, 'PGE::Exp'
  85     if $I0 == 0 goto parse
  86     match = source
  87     goto analyze
  88
  89   parse:
  90     ##   Let's parse the source as a regex
  91     $P0 = get_global 'regex'
  92     match = $P0(source, adverbs :flat :named)
  93     if source == '' goto err_null
  94     if target != 'parse' goto check
  95     .return (match)
  96
  97   check:
  98     unless match goto check_1
  99     $S0 = source
 100     $S1 = match
 101     if $S0 == $S1 goto analyze
 102   check_1:
 103     null $P0
 104     .return ($P0)
 105
 106   analyze:
 107     .local pmc exp, pad
 108     exp = match['expr']
 109     pad = clone adverbs
 110     $P0 = new 'Hash'
 111     pad['lexscope'] = $P0
 112     exp = exp.'perl6exp'(pad)
 113     if null exp goto err_null
 114     .return exp.'compile'(adverbs :flat :named)
 115
 116   err_null:
 117     $I0 = match.'from'()
 118     'parse_error'(match, $I0, 'Null pattern illegal')
 119 .end
 120
 121
 122 =item C<regex(PMC mob, PMC adverbs :slurpy :named)>
 123
 124 Parses a regex according to Perl 6 regex syntax, and returns
 125 the corresponding parse tree.
 126
 127 =cut
 128
 129 .sub 'regex'
 130     .param pmc mob
 131     .param pmc adverbs         :slurpy :named
 132
 133     .local string stop
 134     .local pmc stopstack, optable, match
 135
 136     stopstack = get_global '@!stopstack'
 137     optable = get_global '$optable'
 138
 139     stop = adverbs['stop']
 140     push stopstack, stop
 141     match = optable.'parse'(mob, 'stop'=>stop)
 142     $S0 = pop stopstack
 143
 144     .return (match)
 145 .end
 146
 147 =item C<onload()>
 148
 149 Initializes the Perl6Regex parser and other data structures
 150 needed for compiling regexes.
 151
 152 =cut
 153
 154 .include 'cclass.pasm'
 155
 156 .namespace [ 'PGE::Perl6Regex' ]
 157
 158 .sub '__onload' :load
 159     .local pmc p6meta
 160     p6meta = new 'P6metaclass'
 161     p6meta.'new_class'('PGE::Exp::WS', 'parent'=>'PGE::Exp::Subrule')
 162     p6meta.'new_class'('PGE::Exp::Alias', 'parent'=>'PGE::Exp')
 163
 164     .local pmc optable
 165     optable = new 'PGE::OPTable'
 166     set_global '$optable', optable
 167
 168     $P0 = get_global 'parse_term'
 169     optable.newtok('term:',    'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
 170
 171     $P0 = get_global 'parse_term_ws'
 172     optable.newtok('term:#',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 173
 174     $P0 = get_global 'parse_term_backslash'
 175     optable.newtok("term:\\",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 176
 177     optable.newtok('term:^',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 178     optable.newtok('term:^^',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 179     optable.newtok('term:$$',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 180     optable.newtok('term:\b',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 181     optable.newtok('term:\B',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 182     optable.newtok('term:<<',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 183     optable.newtok('term:>>',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 184     optable.newtok(unicode:"term:\xab", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 185     optable.newtok(unicode:"term:\xbb", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 186
 187     optable.newtok('term:.',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 188     optable.newtok('term:\d',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 189     optable.newtok('term:\D',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 190     optable.newtok('term:\s',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 191     optable.newtok('term:\S',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 192     optable.newtok('term:\w',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 193     optable.newtok('term:\W',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 194     optable.newtok('term:\N',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 195     optable.newtok('term:\n',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Newline')
 196
 197     $P0 = get_global 'parse_dollar'
 198     optable.newtok('term:$',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 199
 200     $P0 = get_global 'parse_subrule'
 201     optable.newtok('term:<',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 202     optable.newtok('term:<?',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 203     optable.newtok('term:<!',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 204     optable.newtok('term:<.',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 205
 206     $P0 = get_global 'parse_enumcharclass'
 207     optable.newtok('term:<[',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 208     optable.newtok('term:<+', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 209     optable.newtok('term:<-', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 210     optable.newtok('term:<![', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 211
 212     $P0 = get_global 'parse_quoted_literal'
 213     optable.newtok("term:'",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 214
 215     optable.newtok('term:::',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 216     optable.newtok('term::::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 217     optable.newtok('term:<cut>',    'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 218     optable.newtok('term:<commit>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 219
 220     $P0 = get_global 'parse_closure'
 221     optable.newtok("term:{{",       'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 222
 223     $P0 = get_global 'parse_action'
 224     optable.newtok("term:{*}",      'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 225
 226
 227     optable.newtok('circumfix:[ ]', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Group')
 228     optable.newtok('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CGroup')
 229
 230     $P0 = get_global 'parse_quant'
 231     optable.newtok('postfix:*',  'looser'=>'term:', 'parsed'=>$P0)
 232     optable.newtok('postfix:+',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 233     optable.newtok('postfix:?',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 234     optable.newtok('postfix::',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 235     optable.newtok('postfix:**', 'equiv'=>'postfix:*', 'parsed'=>$P0)
 236     $P0 = get_global 'parse_quant_error'
 237     optable.newtok('term:*', 'equiv'=>'term:', 'parsed'=>$P0)
 238     optable.newtok('term:+', 'equiv'=>'term:', 'parsed'=>$P0)
 239     optable.newtok('term:?', 'equiv'=>'term:', 'parsed'=>$P0)
 240
 241     optable.newtok('infix:',   'looser'=>'postfix:*', 'assoc'=>'list', 'nows'=>1, 'match'=>'PGE::Exp::Concat')
 242     optable.newtok('infix:&',  'looser'=>'infix:', 'nows'=>1, 'match'=>'PGE::Exp::Conj')
 243     optable.newtok('infix:|',  'looser'=>'infix:&', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 244     optable.newtok('prefix:|', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 245     optable.newtok('infix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 246     optable.newtok('prefix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 247
 248     optable.newtok('infix::=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
 249     optable.newtok('infix:=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
 250
 251     $P0 = get_global 'parse_modifier'
 252     optable.newtok('prefix::', 'looser'=>'infix:|', 'nows'=>1, 'parsed'=>$P0)
 253
 254     optable.newtok('close:}',  'precedence'=>'<', 'nows'=>1)
 255
 256     .local pmc esclist
 257     esclist = new 'Hash'
 258     set_global '%esclist', esclist
 259     esclist['e'] = "\e"
 260     esclist['f'] = "\f"
 261     esclist['r'] = "\r"
 262     esclist['t'] = "\t"
 263     esclist['v'] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
 264     esclist['h'] = unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
 265     esclist['n'] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
 266     # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
 267
 268     # Create and store closure preprocessors in %closure_pp
 269     $P0 = new 'Hash'
 270     set_hll_global ['PGE::Perl6Regex'], '%closure_pp', $P0
 271     $P1 = get_hll_global ['PGE::Perl6Regex'], 'PIR_closure'
 272     $P0["PIR"] = $P1
 273
 274     # Create an array for holding stop tokens
 275     $P0 = new 'ResizablePMCArray'
 276     set_hll_global ['PGE::Perl6Regex'], '@!stopstack', $P0
 277
 278     $P0 = get_global 'compile_perl6regex'
 279     compreg 'PGE::Perl6Regex', $P0
 280     .return ()
 281 .end
 282
 283
 284 =item C<parse_term(PMC mob [, PMC adverbs :slurpy :named])>
 285
 286 Parses literal strings and whitespace.
 287 Return a failed match if the stoptoken is found.
 288
 289 =cut
 290
 291 .sub 'parse_term'
 292     .param pmc mob
 293     .param pmc adverbs         :slurpy :named
 294
 295     .local string target
 296     .local int pos, lastpos
 297     $P0 = getattribute mob, '$.target'
 298     target = $P0
 299     $P0 = getattribute mob, '$.pos'
 300     pos = $P0
 301     lastpos = length target
 302
 303     .local string stop
 304     $P0 = get_hll_global ['PGE::Perl6Regex'], '@!stopstack'
 305     stop = $P0[-1]
 306
 307     $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
 308     if $I0 goto term_ws
 309     $I0 = length stop
 310     if $I0 == 0 goto not_stop
 311     $S0 = substr target, pos, $I0
 312     if $S0 == stop goto end_noterm
 313   not_stop:
 314     ##   find length of word character sequence
 315     .local int litlen
 316     $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
 317     litlen = $I0 - pos
 318
 319     ##   if we didn't find any, return no term
 320     if litlen == 0 goto end_noterm
 321
 322     ##   for multi-char unquoted literals, leave the last character
 323     ##   in case it's quantified (it gets processed as a subsequent term)
 324     if litlen < 2 goto term_literal
 325     dec litlen
 326   term_literal:
 327     $S0 = substr target, pos, litlen
 328     pos += litlen
 329     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 330     mob.'result_object'($S0)
 331     mob.'to'(pos)
 332     .return (mob)
 333
 334   term_ws:
 335     .return 'parse_term_ws'(mob)
 336
 337   end_noterm:
 338     (mob) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 339     .return (mob)
 340 .end
 341
 342
 343 =item C<parse_term_backslash(mob [, adverbs :slurpy :named])>
 344
 345 Parses terms beginning with backslash.
 346
 347 =cut
 348
 349 .sub 'parse_term_backslash'
 350     .param pmc mob
 351     .param pmc adverbs         :slurpy :named
 352
 353     .local string target
 354     .local int pos, lastpos
 355     $P0 = getattribute mob, '$.target'
 356     target = $P0
 357     $P0 = getattribute mob, '$.pos'
 358     pos = $P0
 359     lastpos = length target
 360
 361     .local string initchar
 362     initchar = substr target, pos, 1
 363     $I0 = is_cclass .CCLASS_WORD, initchar, 0
 364     if $I0 goto term_metachar
 365   quoted_metachar:
 366     inc pos
 367     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 368     mob.'result_object'(initchar)
 369     mob.'to'(pos)
 370     .return (mob)
 371
 372   term_metachar:
 373     .local int isnegated
 374     isnegated = is_cclass .CCLASS_UPPERCASE, initchar, 0
 375     ## $S0 = downcase     FIXME: RT# 48108
 376             $I0 = ord initchar
 377             $S0 = chr $I0
 378             $S0 = downcase $S0
 379     if $S0 == 'x' goto scan_xdo
 380     if $S0 == 'o' goto scan_xdo
 381     $P0 = get_global '%esclist'
 382     $I0 = exists $P0[$S0]
 383     if $I0 == 0 goto err_reserved_metachar
 384     inc pos
 385     .local string charlist
 386     charlist = $P0[$S0]
 387     if isnegated goto term_charlist
 388     $I0 = length charlist
 389     if $I0 > 1 goto term_charlist
 390
 391   term_literal:
 392     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 393     mob.'result_object'(charlist)
 394     mob.'to'(pos)
 395     .return (mob)
 396
 397   term_charlist:
 398     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
 399     mob.'result_object'(charlist)
 400     mob['isnegated'] = isnegated
 401     mob.'to'(pos)
 402     .return (mob)
 403
 404   scan_xdo:
 405     inc pos
 406     .local int base, decnum, isbracketed
 407     charlist = ''
 408     base = index '        o d     x', $S0
 409     decnum = 0
 410     $S0 = substr target, pos, 1
 411     isbracketed = iseq $S0, '['
 412     pos += isbracketed
 413   scan_xdo_char_loop:
 414     $S0 = substr target, pos, 1
 415     $I0 = index '0123456789abcdef', $S0
 416     if $I0 < 0 goto scan_xdo_char_end
 417     if $I0 >= base goto scan_xdo_char_end
 418     decnum *= base
 419     decnum += $I0
 420     inc pos
 421     goto scan_xdo_char_loop
 422   scan_xdo_char_end:
 423     $S1 = chr decnum
 424     concat charlist, $S1
 425     unless isbracketed goto scan_xdo_end
 426     if $S0 == ']' goto scan_xdo_end
 427     if $S0 == '' goto err_missing_bracket
 428     if $S0 != ',' goto err_bracketed
 429     if isnegated goto err_negated_brackets
 430     inc pos
 431     decnum = 0
 432     goto scan_xdo_char_loop
 433   scan_xdo_end:
 434     pos += isbracketed
 435     if isnegated goto term_charlist
 436     goto term_literal
 437
 438   err_reserved_metachar:
 439     parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
 440   err_missing_bracket:
 441     parse_error(mob, pos, 'Missing close bracket for \\x[...] or \\o[...]')
 442   err_bracketed:
 443     parse_error(mob, pos, 'Invalid digit in \\x[...] or \\o[...]')
 444   err_negated_brackets:
 445     parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]')
 446 .end
 447
 448
 449 =item C<parse_term_ws(PMC mob)>
 450
 451 Parses a whitespace term.
 452
 453 =cut
 454
 455 .sub 'parse_term_ws'
 456     .param pmc mob
 457     .local string target
 458     .local int pos, lastpos
 459     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
 460     lastpos = length target
 461
 462   term_ws_loop:
 463     ##   scan for the next non-whitespace character
 464     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 465     $S0 = substr target, pos, 1
 466     if $S0 != '#' goto end
 467     ##   we have a #-comment, determine its closing delimiter
 468     inc pos
 469     .local string closedelim
 470     closedelim = "\n"
 471     $S0 = substr target, pos, 1
 472     $I0 = index '<[{(', $S0
 473     if $I0 < 0 goto term_ws_loop_1
 474     closedelim = substr '>]})', $I0, 1
 475   term_ws_loop_1:
 476     $I0 = index target, closedelim, pos
 477     pos = $I0 + 1
 478     if pos > 0 goto term_ws_loop
 479     pos = lastpos
 480   end:
 481     mob.'to'(pos)
 482     .return (mob)
 483 .end
 484
 485
 486 =item C<parse_quant(PMC mob)>
 487
 488 Parses a quantifier, such as *, +, ?, :, and all of their wonderous
 489 combinations.
 490
 491 =cut
 492
 493 .sub 'parse_quant'
 494     .param pmc mob
 495     .local string target
 496     .local pmc key
 497     .local int pos, lastpos
 498     key = mob['KEY']
 499     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
 500     lastpos = length target
 501
 502     .local int min, max, suffixpos
 503     .local string suffix
 504     min = 1
 505     max = 1
 506     suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 507
 508     if key == '**' goto quant_suffix
 509     if key == ':' goto quant_cut
 510     if key == '+' goto quant_max
 511     ##  quantifier is '?' or '*'
 512     min = 0
 513   quant_max:
 514     if key == '?' goto quant_suffix
 515     ##  quantifier is '+' or '*'
 516     max = PGE_INF
 517     goto quant_suffix
 518
 519   quant_cut:
 520     #   The postfix:<:> operator may bring us here when it's really a
 521     #   term:<::> term.  So, we check for that here and fail this match
 522     #   if we really have a cut term.
 523     if key != ':' goto quant_suffix
 524     $S0 = substr target, pos, 1
 525     if $S0 == ':' goto end
 526     mob['backtrack'] = PGE_BACKTRACK_NONE
 527
 528   quant_suffix:
 529     suffix = substr target, suffixpos, 2
 530     if suffix == ':?' goto quant_eager
 531     if suffix == ':!' goto quant_greedy
 532   quant_suffix_1:
 533     suffix = substr target, suffixpos, 1
 534     if suffix == '?' goto quant_eager
 535     if suffix == '!' goto quant_greedy
 536     if suffix != ':' goto quant
 537   quant_none:
 538     mob['backtrack'] = PGE_BACKTRACK_NONE
 539     goto quant_skip_suffix
 540   quant_eager:
 541     mob['backtrack'] = PGE_BACKTRACK_EAGER
 542     goto quant_skip_suffix
 543   quant_greedy:
 544     mob['backtrack'] = PGE_BACKTRACK_GREEDY
 545   quant_skip_suffix:
 546     $I0 = length suffix
 547     pos = suffixpos + $I0
 548
 549   quant:
 550     if key != '**' goto quant_set
 551   quant_closure:
 552     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 553     .local int isconst
 554     isconst = is_cclass .CCLASS_NUMERIC, target, pos
 555     if isconst goto brace_skip
 556     $S0 = substr target, pos, 1
 557     if $S0 != "{" goto err_closure
 558     inc pos
 559   brace_skip:
 560     $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 561     if $I1 <= pos goto err_closure
 562     $S0 = substr target, pos
 563     min = $S0
 564     max = $S0
 565     pos = $I1
 566     $S0 = substr target, pos, 2
 567     if $S0 != '..' goto quant_closure_end
 568     pos += 2
 569     max = PGE_INF
 570     $S0 = substr target, pos, 1
 571     if $S0 != '*' goto quant_range_end
 572     inc pos
 573     goto quant_closure_end
 574   quant_range_end:
 575     $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 576     if $I1 <= pos goto err_closure
 577     $S0 = substr target, pos
 578     max = $S0
 579     pos = $I1
 580   quant_closure_end:
 581     if isconst goto brace_skip2
 582     $S0 = substr target, pos, 1
 583     if $S0 != "}" goto err_closure
 584     inc pos
 585   brace_skip2:
 586     suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 587
 588   quant_set:
 589     mob['min'] = min
 590     mob['max'] = max
 591     mob.'to'(pos)
 592   end:
 593     .return (mob)
 594
 595   err_closure:
 596     parse_error(mob, pos, "Error in closure quantifier")
 597 .end
 598
 599
 600 =item C<parse_quant_error(mob)>
 601
 602 Throw an exception for quantifiers in term position.
 603
 604 =cut
 605
 606 .sub 'parse_quant_error'
 607     .param pmc mob
 608     .local int pos
 609     pos = mob.'to'()
 610     parse_error(mob, pos, "Quantifier follows nothing in regex")
 611 .end
 612
 613
 614 =item C<parse_dollar(PMC mob)>
 615
 616 Parse things that begin with a dollar sign, such as scalars,
 617 anchors, and match subscripts.
 618
 619 =cut
 620
 621 .sub "parse_dollar"
 622     .param pmc mob
 623     .local string target
 624     .local int pos, lastpos
 625     .local string cname
 626     $P0 = getattribute mob, '$.target'
 627     target = $P0
 628     $P0 = getattribute mob, '$.pos'
 629     pos = $P0
 630     lastpos = length target
 631     $S0 = substr target, pos, 1
 632     if $S0 == '<' goto name
 633     $I0 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 634     if $I0 > pos goto numeric
 635     $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
 636     if $I0 > pos goto scalar
 637
 638   eos_anchor:
 639     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Anchor')
 640     mob.'to'(pos)
 641     .return (mob)
 642
 643   scalar:
 644     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 645     dec pos
 646     $I1 = $I0 - pos
 647     cname = substr target, pos, $I1
 648     cname = concat '"', cname
 649     cname = concat cname, '"'
 650     mob["cname"] = cname
 651     mob.'to'($I0)
 652     .return (mob)
 653
 654   numeric:
 655     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 656     $I1 = $I0 - pos
 657     cname = substr target, pos, $I1
 658     mob["cname"] = cname
 659     mob.'to'($I0)
 660     .return (mob)
 661
 662   name:
 663     inc pos
 664     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 665     $I0 = index target, ">", pos
 666     if $I0 < pos goto err_close
 667   name_1:
 668     $I1 = $I0 - pos
 669     cname = substr target, pos, $I1
 670     cname = escape cname
 671     cname = concat '"', cname
 672     cname = concat cname, '"'
 673     mob["cname"] = cname
 674     pos = $I0 + 1
 675     mob.'to'(pos)
 676     .return (mob)
 677
 678   err_close:
 679     parse_error(mob, pos, "Missing close '>' in scalar")
 680     .return (mob)
 681 .end
 682
 683
 684 =item C<parse_subname(STR target, INT pos)>
 685
 686 Scan C<target> starting at C<pos> looking for a subrule name
 687 (following Perl 6's identifier syntax).  Returns any subrule
 688 name found, and the ending position of the name.
 689
 690 =cut
 691
 692
 693 .sub 'parse_subname'
 694     .param string target
 695     .param int pos
 696     .local int startpos, targetlen
 697
 698     targetlen = length target
 699     startpos = pos
 700     $I0 = pos
 701   loop:
 702     $I1 = find_not_cclass .CCLASS_WORD, target, $I0, targetlen
 703     if $I1 == $I0 goto end
 704     pos = $I1
 705     $S0 = substr target, pos, 2
 706     if $S0 != '::' goto end
 707     $I0 = pos + 2
 708     goto loop
 709   end:
 710     $I0 = pos - startpos
 711     $S0 = substr target, startpos, $I0
 712     .return ($S0, pos)
 713 .end
 714
 715
 716 =item C<parse_subrule(PMC mob)>
 717
 718 Parses a subrule token.
 719
 720 =cut
 721
 722 .sub 'parse_subrule'
 723     .param pmc mob
 724     .local string target
 725     .local pmc mobsave
 726     .local int pos, lastpos
 727     .local string key
 728     key = mob['KEY']
 729     mobsave = mob
 730     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
 731     lastpos = length target
 732
 733     ##  default to non-capturing rule
 734     .local int iscapture
 735     iscapture = 0
 736
 737     ##  see what type of subrule this is
 738     if key == '<.' goto scan_subname
 739     if key == '<?' goto zerowidth
 740     if key == '<!' goto negated
 741
 742     ##  capturing subrule, get its name/alias
 743     iscapture = 1
 744     .local string subname, cname
 745     (subname, pos) = 'parse_subname'(target, pos)
 746     cname = subname
 747     $S0 = substr target, pos, 1
 748     unless $S0 == '=' goto subrule_arg
 749     ##  aliased subrule, skip the '=' and get the real name
 750     inc pos
 751     goto scan_subname
 752
 753   negated:
 754     mob['isnegated'] = 1
 755   zerowidth:
 756     mob['iszerowidth'] = 1
 757
 758   scan_subname:
 759     (subname, pos) = 'parse_subname'(target, pos)
 760
 761   subrule_arg:
 762     mob['subname'] = subname
 763     $S0 = substr target, pos, 1
 764     if $S0 == ':' goto subrule_text_arg
 765     if $S0 != ' ' goto subrule_end
 766   subrule_pattern_arg:
 767     inc pos
 768     mob.'to'(pos)
 769     .local pmc regex
 770     regex = get_global 'regex'
 771     $P1 = regex(mob, 'stop'=>'>')
 772     unless $P1 goto end
 773     $S0 = $P1
 774     mob['arg'] = $S0
 775     pos = $P1.'to'()
 776     mob.'to'(-1)
 777     goto subrule_end
 778   subrule_text_arg:
 779     $I0 = pos + 1
 780     pos = find_not_cclass .CCLASS_WHITESPACE, target, $I0, lastpos
 781     if pos == $I0 goto end
 782     if pos >= lastpos goto end
 783     .local string textarg, closedelim
 784     textarg = ''
 785     closedelim = '>'
 786     $S0 = substr target, pos, 1
 787     if $S0 == '"' goto subrule_text_quote
 788     if $S0 != "'" goto subrule_text_loop
 789   subrule_text_quote:
 790     closedelim = $S0
 791     inc pos
 792   subrule_text_loop:
 793     if pos >= lastpos goto end
 794     $S0 = substr target, pos, 1
 795     if $S0 == closedelim goto subrule_text_end
 796     if $S0 != "\\" goto subrule_text_add
 797     inc pos
 798     $S0 = substr target, pos, 1
 799     if $S0 == closedelim goto subrule_text_add
 800     if $S0 == "\\" goto subrule_text_add
 801     textarg .= "\\"
 802   subrule_text_add:
 803     textarg .= $S0
 804     inc pos
 805     goto subrule_text_loop
 806   subrule_text_end:
 807     mob['arg'] = textarg
 808     if closedelim == '>' goto subrule_end
 809     inc pos
 810     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 811   subrule_end:
 812     $S0 = substr target, pos, 1
 813     if $S0 != '>' goto end
 814     inc pos
 815     mob.'to'(pos)
 816     mob['iscapture'] = iscapture
 817     unless iscapture goto end
 818     $S0 = escape cname
 819     $S0 = concat '"', $S0
 820     $S0 = concat $S0, '"'
 821     mob['cname'] = $S0
 822   end:
 823     .return (mob)
 824 .end
 825
 826
 827 =item C<parse_enumcharclass(PMC mob)>
 828
 829 Extract an enumerated character list.
 830
 831 =cut
 832
 833 .sub 'parse_enumcharclass'
 834     .param pmc mob
 835     .param pmc adverbs         :slurpy :named
 836     .local string target
 837     .local pmc term
 838     .local string op
 839     .local int pos, lastpos
 840
 841     $P0 = getattribute mob, '$.target'
 842     target = $P0
 843     pos = mob.to()
 844     lastpos = length target
 845     op = mob['KEY']
 846
 847     ##   handle the case of <[, <+[, <-[, and <![ as the token
 848     ##   by converting to <, <+, <-, or <!
 849     $S0 = substr op, -1, 1
 850     if $S0 != '[' goto parse_loop
 851     chopn op, 1
 852     goto enum
 853
 854   parse_loop:
 855     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 856     if pos >= lastpos goto err_close
 857     $S0 = substr target, pos, 1
 858     if $S0 != '[' goto subrule
 859     inc pos
 860
 861   enum:
 862     .local string charlist
 863     .local int isrange
 864     charlist = ''
 865     isrange = 0
 866
 867   enum_loop:
 868     ##   skip leading whitespace and get next character
 869     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 870     if pos >= lastpos goto err_close
 871     $S0 = substr target, pos, 1
 872     if $S0 == ']' goto enum_close
 873     if $S0 == '-' goto err_hyphen
 874     if $S0 == '.' goto enum_dotrange
 875     if $S0 != "\\" goto enum_addchar
 876   enum_backslash:
 877     inc pos
 878     ##   get escaped character
 879     $S0 = substr target, pos, 1
 880     ##   handle metas such as \n, \t, \r, etc.
 881     $I0 = index 'nrtfae0', $S0
 882     if $I0 == -1 goto enum_addchar
 883     $S0 = substr "\n\r\t\f\a\e\0", $I0, 1
 884   enum_addchar:
 885     inc pos
 886     if isrange goto enum_addrange
 887     charlist .= $S0
 888     goto enum_loop
 889   enum_dotrange:
 890     ##   check if we have a .. range marker
 891     if isrange goto enum_addrange
 892     $S1 = substr target, pos, 2
 893     if $S1 != '..' goto enum_addchar
 894     pos += 2
 895     isrange = 1
 896     goto enum_loop
 897   enum_addrange:
 898     ##   add character range to charlist
 899     isrange = 0
 900     $I2 = ord charlist, -1
 901     $I0 = ord $S0
 902   enum_addrange_1:
 903     inc $I2
 904     if $I2 > $I0 goto enum_loop
 905     $S1 = chr $I2
 906     charlist .= $S1
 907     goto enum_addrange_1
 908   enum_close:
 909     inc pos
 910     ##   create a node for the charlist
 911     term = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
 912     term.'to'(pos)
 913     term.'result_object'(charlist)
 914     goto combine
 915
 916   subrule:
 917     $I0 = pos
 918     .local string subname
 919     (subname, pos) = 'parse_subname'(target, $I0)
 920     if pos == $I0 goto err
 921     term = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
 922     term.'from'($I0)
 923     term.'to'(pos)
 924     term['subname'] = subname
 925     term['iscapture'] = 0
 926
 927   combine:
 928     ##   find out what operator preceded this term
 929     if op == '+' goto combine_plus
 930     if op == '-' goto combine_minus
 931     if op == '<' goto combine_init
 932     if op == '<+' goto combine_init
 933     ##   token was '<-' or '<!'
 934     term['isnegated'] = 1
 935     term['iszerowidth'] = 1
 936     if op == '<!' goto combine_init
 937     ##   token is '<-', we need to match a char by concat dot
 938     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::CCShortcut')
 939     $P0.'to'(pos)
 940     $P0.'result_object'('.')
 941     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
 942     mob.'to'(pos)
 943     mob[0] = term
 944     mob[1] = $P0
 945     goto next_op
 946
 947   combine_init:
 948     mob = term
 949     goto next_op
 950
 951   combine_plus:
 952     ##   <a+b>  ==>   <a> | <b>
 953     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
 954     $P0.'to'(pos)
 955     $P0[0] = mob
 956     $P0[1] = term
 957     mob = $P0
 958     goto next_op
 959
 960   combine_minus:
 961     ##   <a-b> ==>   <!b> <a>
 962     term['isnegated'] = 1
 963     term['iszerowidth'] = 1
 964     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
 965     $P0.'to'(pos)
 966     $P0[0] = term
 967     $P0[1] = mob
 968     mob = $P0
 969     goto next_op
 970
 971   next_op:
 972     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 973     if pos >= lastpos goto err_close
 974
 975     op = substr target, pos, 1
 976     inc pos
 977     if op == '+' goto parse_loop
 978     if op == '-' goto parse_loop
 979     if op != '>' goto err
 980     mob.'to'(pos)
 981     goto end
 982
 983   err:
 984     parse_error(mob, pos, "Error parsing enumerated character class")
 985     goto end
 986   err_hyphen:
 987     parse_error(mob, pos, "Unescaped '-' in charlist (use '..' or '\\-')")
 988     goto end
 989   err_close:
 990     parse_error(mob, pos, "Missing close '>' or ']>' in enumerated character class")
 991   end:
 992     .return (mob)
 993 .end
 994
 995
 996 =item C<parse_quoted_literal>
 997
 998 Parses '...' literals.
 999
1000 =cut
1001
1002 .sub 'parse_quoted_literal'
1003     .param pmc mob
1004     .local int pos, lastpos
1005     .local string target
1006     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
1007     lastpos = length target
1008     lastpos -= 1
1009     .local string lit
1010     lit = ''
1011   literal_iter:
1012     if pos > lastpos goto literal_error
1013     $S0 = substr target, pos, 1
1014     if $S0 == "'" goto literal_end
1015     if $S0 != "\\" goto literal_add
1016     inc pos
1017     $S0 = substr target, pos, 1
1018   literal_add:
1019     inc pos
1020     lit .= $S0
1021     goto literal_iter
1022   literal_end:
1023     inc pos
1024     mob.'result_object'(lit)
1025     mob.'to'(pos)
1026     .return (mob)
1027   literal_error:
1028     parse_error(mob, pos, "No closing ' in quoted literal")
1029     .return (mob)
1030 .end
1031
1032
1033 =item C<parse_modifier>
1034
1035 Parse a modifier.
1036
1037 =cut
1038
1039 .sub 'parse_modifier'
1040     .param pmc mob
1041     .local int pos, lastpos
1042     .local string target, value
1043     .local string key
1044     key = mob['KEY']
1045     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Modifier')
1046     lastpos = length target
1047     value = "1"
1048     $I0 = pos
1049     pos = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
1050     if pos == $I0 goto name
1051     $I1 = pos - $I0
1052     value = substr target, $I0, $I1
1053     $I0 = pos
1054   name:
1055     pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos
1056     $I1 = pos - $I0
1057     $S0 = substr target, $I0, $I1
1058     mob['key'] = $S0
1059     mob.'result_object'(value)
1060     $S0 = substr target, pos, 1
1061     if $S0 != '(' goto end
1062     $I0 = pos + 1
1063     pos = index target, ')', pos
1064     $I1 = pos - $I0
1065     $S0 = substr target, $I0, $I1
1066     mob.'result_object'($S0)
1067     inc pos
1068   end:
1069     ### XXX pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1070     mob.'to'(pos)
1071     .return (mob)
1072 .end
1073
1074
1075 .sub 'parse_closure'
1076     .param pmc mob
1077     .local string target
1078     .local int pos, len
1079     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Closure')
1080     len = 2
1081   init:
1082     $S0 = substr target, pos, 1
1083     if $S0 != "{" goto body
1084     inc len
1085     inc pos
1086     goto init
1087   body:
1088     $S0 = repeat "}", len
1089     $I0 = index target, $S0, pos
1090     if $I0 < pos goto err_noclose
1091     $I1 = $I0 - pos
1092     $S1 = substr target, pos, $I1
1093     mob.'result_object'($S1)
1094     pos = $I0 + len
1095     mob.'to'(pos)
1096     .return (mob)
1097  err_noclose:
1098     parse_error(mob, pos, "Missing closing braces for closure")
1099     .return (mob)
1100 .end
1101
1102
1103 .sub 'parse_action'
1104     .param pmc mob
1105     .local string target
1106     .local int pos, keypos
1107     (mob, pos, target) = mob.'new'(mob, 'grammar' => 'PGE::Exp::Action')
1108     keypos = index target, '#= ', pos
1109     if keypos < 0 goto end
1110     $I0 = find_cclass .CCLASS_NEWLINE, target, pos, keypos
1111     if $I0 < keypos goto end
1112     .local string actionkey
1113     keypos += 3
1114     $I0 -= keypos
1115     actionkey = substr target, keypos, $I0
1116     mob['actionkey'] = actionkey
1117   end:
1118     mob.'to'(pos)
1119     .return (mob)
1120 .end
1121
1122
1123 .sub 'parse_error'
1124     .param pmc mob
1125     .param int pos
1126     .param string message
1127     $P0 = getattribute mob, '$.pos'
1128     $P0 = pos
1129     $P0 = new 'Exception'
1130     $S0 = 'perl6regex parse error: '
1131     $S0 .= message
1132     $S0 .= ' at offset '
1133     $S1 = pos
1134     $S0 .= $S1
1135     $S0 .= ", found '"
1136     $P1 = getattribute mob, '$.target'
1137     $S1 = $P1
1138     $S1 = substr $S1, pos, 1
1139     $S0 .= $S1
1140     $S0 .= "'"
1141     $P0 = $S0
1142     throw $P0
1143     .return ()
1144 .end
1145
1146
1147
1148
1149 .namespace [ 'PGE::Exp' ]
1150
1151 .sub 'perl6exp' :method
1152     .param pmc pad
1153     .return (self)
1154 .end
1155
1156
1157 .namespace [ 'PGE::Exp::Literal' ]
1158
1159 .sub 'perl6exp' :method
1160     .param pmc pad
1161     $I0 = pad['ignorecase']
1162     self['ignorecase'] = $I0
1163     .return (self)
1164 .end
1165
1166
1167 .namespace [ 'PGE::Exp::Concat' ]
1168
1169 .sub 'perl6exp' :method
1170     .param pmc pad
1171
1172     .local pmc array, exp
1173     .local int i, j, n
1174     array = self.'list'()
1175     n = elements array
1176     i = 0
1177     j = 0
1178   iter_loop:
1179     if i >= n goto iter_end
1180     exp = self[i]
1181     inc i
1182     exp = exp.'perl6exp'(pad)
1183     if null exp goto iter_loop
1184     self[j] = exp
1185     inc j
1186     goto iter_loop
1187   iter_end:
1188     array = j
1189     if j > 1 goto end
1190     $P0 = array[0]
1191     .return ($P0)
1192   end:
1193     .return (self)
1194 .end
1195
1196
1197 .namespace [ 'PGE::Exp::Quant' ]
1198
1199 .sub 'perl6exp' :method
1200     .param pmc pad
1201
1202     $I0 = exists self['backtrack']
1203     if $I0 goto backtrack_done
1204     self['backtrack'] = PGE_BACKTRACK_GREEDY
1205     $I0 = pad['ratchet']
1206     if $I0 == 0 goto backtrack_done
1207     self['backtrack'] = PGE_BACKTRACK_NONE
1208   backtrack_done:
1209
1210     .local pmc exp0
1211     .local int isarray
1212     isarray = pad['isarray']
1213     pad['isarray'] = 1
1214     exp0 = self[0]
1215     exp0['isquant'] = 1
1216     exp0 = exp0.'perl6exp'(pad)
1217     self[0] = exp0
1218     pad['isarray'] = isarray
1219     .return (self)
1220 .end
1221
1222
1223 .namespace [ 'PGE::Exp::Group' ]
1224
1225 .sub 'perl6exp' :method
1226     .param pmc pad
1227     .local pmc exp0
1228
1229     $I0 = self['isquant']
1230     if $I0 goto backtrack_done
1231     $I0 = exists self['backtrack']
1232     if $I0 goto backtrack_done
1233     $I0 = pad['ratchet']
1234     if $I0 == 0 goto backtrack_done
1235     self['backtrack'] = PGE_BACKTRACK_NONE
1236   backtrack_done:
1237
1238     exp0 = self[0]
1239     exp0 = exp0.'perl6exp'(pad)
1240     self[0] = exp0
1241     .return (self)
1242 .end
1243
1244
1245 .namespace [ 'PGE::Exp::CGroup' ]
1246
1247 .sub 'perl6exp' :method
1248     .param pmc pad
1249     .local pmc exp
1250
1251     $I0 = self['isquant']
1252     if $I0 goto backtrack_done
1253     $I0 = exists self['backtrack']
1254     if $I0 goto backtrack_done
1255     $I0 = pad['ratchet']
1256     if $I0 == 0 goto backtrack_done
1257     self['backtrack'] = PGE_BACKTRACK_NONE
1258   backtrack_done:
1259
1260     self['iscapture'] = 1
1261     $I0 = exists self['isscope']
1262     if $I0 goto set_cname
1263     self['isscope'] = 1
1264
1265   set_cname:
1266     $I0 = exists self['cname']
1267     if $I0 goto set_subpats
1268     $I0 = pad['subpats']
1269     self['cname'] = $I0
1270
1271   set_subpats:
1272     .local string cname
1273     cname = self['cname']
1274     $S0 = substr cname, 0, 1
1275     if $S0 == '"' goto set_lexicals
1276     $I0 = cname
1277     inc $I0
1278     pad['subpats'] = $I0
1279
1280   set_lexicals:
1281     .local int isarray
1282     isarray = 0
1283     .local pmc lexscope
1284     lexscope = pad['lexscope']
1285     $I0 = exists lexscope[cname]
1286     if $I0 == 0 goto set_lexicals_1
1287     $P0 = lexscope[cname]
1288     $P0['isarray'] = 1
1289     isarray = 1
1290   set_lexicals_1:
1291     lexscope[cname] = self
1292
1293     .local int padarray
1294     padarray = pad['isarray']
1295     isarray |= padarray
1296     self['isarray'] = isarray
1297     $I0 = self['isscope']
1298     if $I0 == 0 goto unscoped
1299
1300   scoped:
1301     .local int subpats
1302     subpats = pad['subpats']
1303     pad['subpats'] = 0
1304     pad['isarray'] = 0
1305     $P0 = new 'Hash'
1306     pad['lexscope'] = $P0
1307     exp = self[0]
1308     exp = exp.'perl6exp'(pad)
1309     self[0] = exp
1310     pad['lexscope'] = lexscope
1311     pad['isarray'] = padarray
1312     pad['subpats'] = subpats
1313     goto end
1314
1315   unscoped:
1316     exp = self[0]
1317     exp = exp.'perl6exp'(pad)
1318     self[0] = exp
1319   end:
1320     .return (self)
1321 .end
1322
1323
1324 .namespace [ 'PGE::Exp::Subrule' ]
1325
1326 .sub 'perl6exp' :method
1327     .param pmc pad
1328
1329     $I0 = self['isquant']
1330     if $I0 goto backtrack_done
1331     $I0 = exists self['backtrack']
1332     if $I0 goto backtrack_done
1333     $I0 = pad['ratchet']
1334     if $I0 == 0 goto backtrack_done
1335     self['backtrack'] = PGE_BACKTRACK_NONE
1336   backtrack_done:
1337
1338     .local int iscapture, isarray
1339     .local pmc lexscope
1340     iscapture = self['iscapture']
1341     if iscapture == 0 goto end
1342     .local string cname
1343     cname = self['cname']
1344     isarray = pad['isarray']
1345     lexscope = pad['lexscope']
1346     $I0 = exists lexscope[cname]
1347     if $I0 == 0 goto lexscope_1
1348     $P0 = lexscope[cname]
1349     $P0['isarray'] = 1
1350     isarray = 1
1351   lexscope_1:
1352     lexscope[cname] = self
1353     self['isarray'] = isarray
1354   next_cname:
1355     $S0 = substr cname, 0, 1
1356     if $S0 == '"' goto end
1357     $I0 = cname
1358     inc $I0
1359     pad['subpats'] = $I0
1360   end:
1361     .return (self)
1362 .end
1363
1364
1365 .namespace [ 'PGE::Exp::WS' ]
1366
1367 .sub 'perl6exp' :method
1368     .param pmc pad
1369
1370     $I0 = pad['sigspace']
1371     if $I0 goto end
1372     null $P0
1373     .return ($P0)
1374   end:
1375     self['subname'] = 'ws'
1376     self['iscapture'] = 0
1377     $I0 = pad['ratchet']
1378     unless $I0 goto end_1
1379     self['backtrack'] = PGE_BACKTRACK_NONE
1380   end_1:
1381     .return (self)
1382 .end
1383
1384
1385 .namespace [ 'PGE::Exp::Alt' ]
1386
1387 .sub 'perl6exp' :method
1388     .param pmc pad
1389
1390     .local pmc exp0, exp1
1391     exp0 = self[0]
1392     exp1 = self[1]
1393
1394     ##   if we only have one operand (prefix:|),
1395     ##   reduce and return it.
1396     $I0 = defined self[1]
1397     if $I0 goto with_rhs
1398     .return exp0.'perl6exp'(pad)
1399   with_rhs:
1400
1401     ##   if lhs is whitespace, then this is a prefix-alt and
1402     ##   we ignore it (by simply returning its rhs)
1403     $I0 = isa exp0, 'PGE::Exp::WS'
1404     if $I0 == 0 goto with_lhs
1405     .return exp1.'perl6exp'(pad)
1406   with_lhs:
1407
1408     .local pmc lexscope, savescope, iter
1409     lexscope = pad['lexscope']
1410     savescope = new 'Hash'
1411     iter = new 'Iterator', lexscope
1412   iter_loop:
1413     unless iter goto iter_end
1414     $P1 = shift iter
1415     $P2 = iter[$P1]
1416     savescope[$P1] = $P2
1417     goto iter_loop
1418   iter_end:
1419     $I0 = pad['subpats']
1420     exp0 = exp0.'perl6exp'(pad)
1421     self[0] = exp0
1422
1423     $I1 = pad['subpats']
1424     pad['subpats'] = $I0
1425     pad['lexscope'] = savescope
1426     exp1 = exp1.'perl6exp'(pad)
1427     self[1] = exp1
1428     $I0 = pad['subpats']
1429     if $I0 >= $I1 goto end
1430     pad['subpats'] = $I1
1431   end:
1432     .return (self)
1433 .end
1434
1435
1436 .namespace [ 'PGE::Exp::Alias' ]
1437
1438 .sub 'perl6exp' :method
1439     .param pmc pad
1440     .local string cname
1441     .local pmc exp0, exp1
1442
1443     exp0 = self[0]
1444     cname = exp0['cname']
1445     exp1 = self[1]
1446
1447     ##   If we're aliasing a capture group or a quantified capture
1448     ##   group, then we just move the alias name to that group.
1449     ##   Otherwise, we need to create a capture group for this
1450     ##   alias and return that.
1451
1452     $I0 = isa exp1, 'PGE::Exp::CGroup'
1453     if $I0 == 1 goto make_alias
1454     $I0 = isa exp1, 'PGE::Exp::Subrule'
1455     if $I0 == 1 goto make_alias
1456     $I0 = isa exp1, 'PGE::Exp::Quant'
1457     if $I0 == 0 goto add_cgroup
1458     $P0 = exp1[0]
1459     $I0 = isa $P0, 'PGE::Exp::CGroup'
1460     if $I0 == 0 goto add_cgroup
1461     $P0['cname'] = cname
1462     goto end
1463
1464   add_cgroup:
1465     .local pmc cexp
1466     cexp = self.'new'(self, 'grammar'=>'PGE::Exp::CGroup')
1467     $I0 = self.from()
1468     cexp.'from'($I0)
1469     $I0 = self.to()
1470     cexp.'to'($I0)
1471     cexp[0] = exp1
1472     cexp['isscope'] = 0
1473     cexp['iscapture'] = 1
1474     cexp['cname'] = cname
1475     cexp = cexp.'perl6exp'(pad)
1476     .return (cexp)
1477
1478   make_alias:
1479     exp1['cname'] = cname
1480     exp1['iscapture'] = 1
1481   end:
1482     exp1 = exp1.'perl6exp'(pad)
1483     .return (exp1)
1484 .end
1485
1486
1487 .namespace [ 'PGE::Exp::Modifier' ]
1488
1489 .sub 'perl6exp' :method
1490     .param pmc pad
1491     .local string key
1492     .local string value
1493     key = self['key']
1494     value = self
1495     if key == 'words' goto sigspace
1496     if key == 's' goto sigspace
1497     if key == 'w' goto sigspace
1498     if key == 'i' goto ignorecase
1499     goto setpad
1500   sigspace:
1501     key = 'sigspace'
1502     goto setpad
1503   ignorecase:
1504     key = 'ignorecase'
1505   setpad:
1506     $P0 = pad[key]
1507     pad[key] = value
1508     .local pmc exp
1509     exp = self[0]
1510     exp = exp.'perl6exp'(pad)
1511     self[0] = exp
1512     pad[key] = $P0
1513     .return (exp)
1514 .end
1515
1516 .namespace [ 'PGE::Exp::Conj' ]
1517
1518 .sub 'perl6exp' :method
1519     .param pmc pad
1520     $P0 = self[0]
1521     $P0 = $P0.'perl6exp'(pad)
1522     self[0] = $P0
1523     $P1 = self[1]
1524     $P1 = $P1.'perl6exp'(pad)
1525     self[1] = $P1
1526     .return (self)
1527 .end
1528
1529
1530 .namespace [ 'PGE::Exp::Closure' ]
1531
1532 .sub 'perl6exp' :method
1533     .param pmc pad
1534     .local string lang
1535     .local pmc closure_pp
1536     .local pmc closure_fn
1537     lang = pad['lang']
1538     self['lang'] = lang
1539     # see if we need to do any pre-processing of the closure
1540     closure_pp = get_hll_global ['PGE::Perl6Regex'], '%closure_pp'
1541     $I0 = defined closure_pp[lang]
1542     if $I0 == 0 goto end
1543     closure_fn = closure_pp[lang]
1544     $S1 = self
1545     $S1 = closure_fn($S1)
1546     self.'result_object'($S1)
1547   end:
1548     .return (self)
1549 .end
1550
1551 =back
1552
1553 =head1 Functions
1554
1555 =over 4
1556
1557 =item C<PIR_closure(string code)>
1558
1559 This helper function helps with :lang(PIR) closures in rules
1560 by adding a ".sub" wrapper around the code if one isn't
1561 already present.
1562
1563 =back
1564 =cut
1565
1566 .namespace [ 'PGE::Perl6Regex' ]
1567
1568 .sub 'PIR_closure'
1569     .param string code
1570     $I0 = index code, '.sub'
1571     if $I0 >= 0 goto end
1572     code = concat ".sub anon :anon\n.param pmc match\n", code
1573     code .= "\n.end\n"
1574   end:
1575     .return (code)
1576 .end
1577
1578
1579 .namespace [ 'PGE::Exp::Action' ]
1580
1581 .sub 'perl6exp' :method
1582     .param pmc pad
1583     $S0 = pad['name']
1584     self['actionname'] = $S0
1585     .return (self)
1586 .end
1587
1588
1589 .namespace [ 'PGE::Exp::Cut' ]
1590
1591 .sub 'perl6exp' :method
1592     .param pmc pad
1593     $S0 = self
1594     if $S0 == ':::' goto cut_rule
1595     if $S0 == '<commit>' goto cut_match
1596     self['cutmark'] = PGE_CUT_GROUP
1597     .return (self)
1598   cut_rule:
1599     self['cutmark'] = PGE_CUT_RULE
1600     .return (self)
1601   cut_match:
1602     self['cutmark'] = PGE_CUT_MATCH
1603     .return (self)
1604 .end
1605
1606 # Local Variables:
1607 #   mode: pir
1608 #   fill-column: 100
1609 # End:
1610 # vim: expandtab shiftwidth=4 ft=pir: