compilers/pct/src/PCT/Grammar.pir

   1 # Copyright (C) 2007-2008, The Perl Foundation.
   2 # $Id$
   3
   4 =head1 NAME
   5
   6 PCT::Grammar - base grammar with useful rules
   7
   8 =head1 SYNOPSIS
   9
  10     grammar MyGrammar is PCT::Grammar;
  11
  12     rule abc { [ word | <panic: word not found> ] }
  13
  14     rule quote {
  15         [ \' <string_literal: '> \'
  16         | \" <string_literal: "> \"
  17         ]
  18     }
  19
  20 =head1 DESCRIPTION
  21
  22 This file implements C<PCT::Grammar>, which is a basic grammar object
  23 with a few useful methods for parsing thrown in.
  24
  25 =head2 Methods
  26
  27 =over 4
  28
  29 =item panic(match [, message, ...] )
  30
  31 Throws an exception at the current point of the match, with message
  32 as part of the exception payload.  The message doesn't end with
  33 a newline, then the line number and offset of the match are
  34 also included.
  35
  36 =cut
  37
  38 .namespace [ 'PCT::Grammar' ]
  39
  40 .sub 'onload' :anon :init :load
  41     load_bytecode 'PGE.pbc'
  42     load_bytecode 'PGE/Util.pbc'
  43     .local pmc p6meta
  44     p6meta = new 'P6metaclass'
  45     p6meta.'new_class'('PCT::Grammar', 'parent'=>'PGE::Grammar')
  46     $P0 = get_class 'PCT::Grammar'
  47     $P1 = get_hll_global ['PGE::Util'], 'die'
  48     $P0.'add_method'('panic', $P1)
  49     .return ()
  50 .end
  51
  52
  53 =item item()
  54
  55 Here we overload the item() method from PGE::Match to
  56 throw an exception if a result object hasn't been set.
  57
  58 =cut
  59
  60 .sub 'item' :method
  61     .local pmc obj
  62     obj = getattribute self, '$!item'
  63     unless null obj goto end
  64     die "No result object"
  65   end:
  66     .return (obj)
  67 .end
  68
  69
  70 =item ww()
  71
  72 Special-purpose rule to return true if we're in the middle
  73 of a word -- i.e., if the previous and next character are
  74 both "word characters".  This is roughly equivalent to
  75 C<< <?after \w><?before \w> >> except it's much quicker.
  76 In particular, C<< <!ww> >> can be used by :sigspace rules
  77 to enforce whitespace between lexical words.
  78
  79 =cut
  80
  81 .include 'cclass.pasm'
  82
  83 .sub 'ww' :method
  84     .local pmc mob
  85     .local int pos
  86     .local string target
  87     $P0 = get_hll_global ['PGE'], 'Match'
  88     (mob, pos, target) = $P0.'new'(self)
  89     if pos == 0 goto fail
  90     $I0 = is_cclass .CCLASS_WORD, target, pos
  91     unless $I0 goto fail
  92     $I1 = pos - 1
  93     $I0 = is_cclass .CCLASS_WORD, target, $I1
  94     unless $I0 goto fail
  95     mob.'to'(pos)
  96   fail:
  97     .return (mob)
  98 .end
  99
 100
 101 .sub 'string_literal' :method
 102     .param string stop
 103     .param pmc adverbs         :slurpy :named
 104
 105     ##  create a new match object, get the new match position
 106     .local pmc mob
 107     .local int pos, lastpos, stoplen
 108     .local string target, escapechars
 109     (mob, pos, target) = self.'new'(self)
 110     lastpos = length target
 111     stoplen = length stop
 112     $S0 = substr stop, 0, 1
 113     escapechars = concat "\\", $S0
 114
 115     ##  leave space for close delimiter
 116     lastpos -= stoplen
 117
 118     ##  now initialize and loop through target
 119   literal_init:
 120     .local string literal, litchar
 121     literal = ''
 122
 123   literal_loop:
 124     ##  if we're beyond the last possible position, fail
 125     if pos > lastpos goto fail
 126
 127     ##  if ending delimiter, then we're done
 128     $S0 = substr target, pos, stoplen
 129     if $S0 == stop goto literal_end
 130     if pos >= lastpos goto fail
 131
 132     ##  get next character in literal
 133     litchar = substr target, pos, 1
 134     inc pos
 135
 136     ##  add non-escape characters to literal
 137     if litchar != "\\" goto add_litchar
 138
 139     ##  look at the next character, if it's always escaped, add it and
 140     ##  move on
 141     .local string escaped
 142     escaped = substr target, pos, 1
 143     $I0 = index escapechars, escaped
 144     if $I0 < 0 goto interpolated_escape
 145     inc pos
 146     literal .= escaped
 147     goto literal_loop
 148
 149   interpolated_escape:
 150     ##  if not double-quoted delim, no interpolation
 151     if stop != '"' goto add_litchar
 152     litchar = escaped
 153     inc pos
 154     $I0 = index "abefnrt0xdo", litchar
 155     if $I0 < 0 goto add_litchar
 156
 157     ##  if it's one of "xdo", then handle that specially
 158     if $I0 >= 8 goto literal_xdo
 159
 160     litchar = substr "\a\b\e\f\n\r\t\0", $I0, 1
 161     goto add_litchar
 162
 163   literal_xdo:
 164     ##  handle \x, \d, and \o escapes.  start by converting
 165     ##  the 'o', 'd', or 'x' into 8, 10, or 16 (yes, it's hack
 166     ##  but it works).  Then loop through the characters that
 167     ##  follow to compute the integer value of the codepoint,
 168     ##  and add that codepoint to our literal.
 169     .local int base, codepoint, isbracketed
 170     base = index '        o d     x', litchar
 171     codepoint = 0
 172     $S0 = substr target, pos, 1
 173     isbracketed = iseq $S0, '['
 174     pos += isbracketed
 175   literal_xdo_char_loop:
 176     $S0 = substr target, pos, 1
 177     $I0 = index '0123456789abcdef', $S0
 178     if $I0 < 0 goto literal_xdo_char_end
 179     if $I0 >= base goto literal_xdo_char_end
 180     codepoint *= base
 181     codepoint += $I0
 182     inc pos
 183     goto literal_xdo_char_loop
 184   literal_xdo_char_end:
 185     $S1 = chr codepoint
 186     concat literal, $S1
 187     unless isbracketed goto literal_xdo_end
 188     if $S0 == ']' goto literal_xdo_end
 189     if $S0 != ',' goto fail
 190     inc pos
 191     codepoint = 0
 192     goto literal_xdo_char_loop
 193   literal_xdo_end:
 194     pos += isbracketed
 195     goto literal_loop
 196
 197   add_litchar:
 198     literal .= litchar
 199     goto literal_loop
 200
 201   literal_end:
 202     mob.'to'(pos)
 203     mob.'result_object'(literal)
 204     .return (mob)
 205
 206   fail:
 207     mob.'to'(-1)
 208     .return (mob)
 209 .end
 210
 211 # Local Variables:
 212 #   mode: pir
 213 #   fill-column: 100
 214 # End:
 215 # vim: expandtab shiftwidth=4 ft=pir: