1 # Copyright (C) 2004-2008, The Perl Foundation.
4 package Parrot
::Pmc2c
::Parser
;
9 use base
qw( Exporter );
11 our @EXPORT_OK = qw( parse_pmc extract_balanced );
12 use Parrot
::Pmc2c
::PMC
();
13 use Parrot
::Pmc2c
::Attribute
;
14 use Parrot
::Pmc2c
::Method
();
15 use Parrot
::Pmc2c
::Emitter
();
16 use Parrot
::Pmc2c
::UtilFunctions
qw(count_newlines filename slurp);
17 use Text
::Balanced
'extract_bracketed';
21 Parrot::Pmc2c::Parser - PMC Parser
25 use Parrot::Pmc2c::Parser;
29 Parrot::Pmc2c::Parser parses a sudo C syntax into a perl hash that is then dumped.
34 $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename);
36 B<Purpose:> Parse PMC code and return a hash ref of pmc attributes.
38 B<Arguments:> List of two arguments:
48 Filename of the pmc to parse.
52 B<Return Values:> Reference to a Parrot::Pmc2c::PMC object
54 B<Comments:> Called by C<dump_pmc()>.
59 my ( $pmc2cMain, $filename ) = @_;
62 $filename = $pmc2cMain->find_file( filename
( $filename, '.pmc' ), 1 );
63 my $code = slurp
($filename);
65 my ( $preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) =
66 parse_top_level
($code);
68 my $pmc = Parrot
::Pmc2c
::PMC
->create($pmcname);
69 $pmc->preamble( Parrot
::Pmc2c
::Emitter
->text( $preamble, $filename, 1 ) );
71 $pmc->set_filename($filename);
72 $pmc->set_flags($flags);
73 $pmc->set_parents($parents);
75 # the +1 puts us on the current line
76 my $lineno = count_newlines
($preamble) + $chewed_lines + 1;
79 ($lineno, $pmcbody) = find_attrs
( $pmc, $pmcbody, $lineno, $filename);
80 ($lineno, $class_init) = find_methods
($pmc, $pmcbody, $lineno, $filename);
82 $pmc->postamble( Parrot
::Pmc2c
::Emitter
->text( $post, $filename, $lineno ) );
84 # ensure class_init is the last method in the method list
85 $pmc->add_method($class_init) if $class_init;
86 $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") );
87 $pmc->pre_method_gen();
93 my ($pmc, $pmcbody, $lineno, $filename) = @_;
95 # backreferences here are all +1 because below the qr is wrapped in quotes
99 [;\n\s
]* # blank spaces and spurious semicolons
100 (?
:/\*.*?\*/)?
# C comments
108 (INTVAL
|FLOATVAL
|STRING\s
+\
*|PMC\s
+\
*|\w
+\s
+\
*|Parrot_\w
*)
118 # declaration terminator
126 while ($pmcbody =~ s/($attr_re)//) {
127 my ($type, $name, @modifiers, $comment);
130 @modifiers = split /\s/, $4;
135 $pmc->add_attribute(Parrot
::Pmc2c
::Attribute
->new(
139 modifiers
=> \
@modifiers,
144 return ($lineno, $pmcbody);
148 my ($pmc, $pmcbody, $lineno, $filename) = @_;
151 # backreferences here are all +1 because below the qr is wrapped in quotes
152 my $signature_re = qr{
155 [;\n\s
]* # blank spaces and spurious semicolons
156 (?
:/\*.*?\*/)?
# C comments
159 ((?
:PARROT_\w
+\s
+)+)?
# decorators
161 # vtable|method marker
162 (?
:(VTABLE
|METHOD
)\s
+)?
164 ((?
:\w
+\s
*?\
**\s
*)?\w
+) # method name (includes return type)
167 (?
:\w
+\s
*\
*?
# type (pointer optional)
172 (?
:\
("[^\)]+"\
))?
# with optional parameter
174 ,?\s
* # probably a comma and whitespace
175 )* # zero or more of these bad boys
178 ((?
::(\w
+)\s
*)*) # method attrs
182 while ( $pmcbody =~ s/($signature_re)// ) {
183 my ( $decorators, $marker, $methodname, $parameters, $rawattrs ) =
184 ( $2, $3, $4, $5, $6 );
185 my $attrs = defined $rawattrs ? parse_method_attrs
($rawattrs) : {};
186 $lineno += count_newlines
($1);
190 if ($methodname =~ /(.*\s+\*?)(\w+)/) {
191 ($returntype, $methodname) = ($1, $2);
194 ( my $methodblock, $pmcbody ) = extract_balanced
($pmcbody);
196 $methodblock = strip_outer_brackets
($methodblock);
198 # remove pmclass 4 space indent
199 $methodblock =~ s/^[ ]{4}//mg;
201 # trim trailing ws from last line
202 $methodblock =~ s/\n[\t ]+$/\n/g;
205 $decorators =~ s/^\s*(.*?)\s*$/$1/s;
206 $decorators = [ split /\s+/ => $decorators ];
208 $returntype = 'void' if (defined $marker && $marker eq 'METHOD');
210 my $method = Parrot
::Pmc2c
::Method
->new(
213 parent_name
=> $pmc->name,
214 body
=> Parrot
::Pmc2c
::Emitter
->text( $methodblock, $filename, $lineno ),
215 return_type
=> $returntype,
216 parameters
=> $parameters,
217 type
=> Parrot
::Pmc2c
::Method
::VTABLE
,
219 decorators
=> $decorators,
223 # METHOD needs FixedIntegerArray header
224 if ( $marker and $marker =~ /METHOD/ ) {
225 Parrot
::Pmc2c
::PCCMETHOD
::rewrite_pccmethod
( $method, $pmc );
226 $pmc->set_flag('need_fia_header');
229 # PCCINVOKE needs FixedIntegerArray header
230 $pmc->set_flag('need_fia_header') if $methodblock =~ /PCCINVOKE/;
232 # the class_init method is added last after all other methods
233 if ( $methodname eq 'class_init' ) {
234 $class_init = $method;
238 # Name-mangle NCI methods to avoid conflict with vtable methods.
239 if ( $marker and $marker !~ /VTABLE/ ) {
240 $method->type(Parrot
::Pmc2c
::Method
::NON_VTABLE
);
241 $method->name("nci_$methodname");
242 $method->symbol($methodname);
245 parse_mmds
( $method, $filename, $lineno )
246 if $methodblock =~ /\bMMD_(\w+):/;
248 $pmc->add_method($method);
251 $lineno += count_newlines
($methodblock);
254 return ($lineno, $class_init);
258 my ( $method, $filename, $lineno ) = @_;
259 my $mmd_methods = [];
260 my $body_text = $method->body;
261 my $default_body = $body_text;
262 my $default_body_lineno = $lineno;
264 # now split into MMD if necessary:
265 while ( $body_text =~ s/(\bMMD_(\w+):\s*)// ) {
267 $lineno += count_newlines
($1);
270 $method->add_mmd_rights($right_type);
272 ( my $mmd_part, $body_text ) = extract_bracketed_body_text
( $body_text, '{' );
274 die "Empty MMD body near '$body_text'" unless $mmd_part;
275 my $mmd_part_lines = count_newlines
($mmd_part);
277 # remove whitespace at end of last line
278 $mmd_part =~ s/\n\s*$/\n/s;
280 if ( $right_type eq 'DEFAULT' ) {
281 $default_body = $mmd_part;
282 $default_body_lineno = $lineno;
285 my $mmd_method = Parrot
::Pmc2c
::Method
->new(
287 name
=> $method->name . "_$right_type",
288 parent_name
=> $method->parent_name,
289 body
=> Parrot
::Pmc2c
::Emitter
->text( $mmd_part, $filename, $lineno ),
290 return_type
=> $method->return_type,
291 parameters
=> $method->parameters,
292 type
=> Parrot
::Pmc2c
::Method
::VTABLE
,
293 attrs
=> $method->attrs,
294 right
=> $right_type,
298 push @
{$mmd_methods}, $mmd_method;
301 $lineno += $mmd_part_lines;
303 $method->mmds($mmd_methods);
304 $method->body( Parrot
::Pmc2c
::Emitter
->text( $default_body, $filename, $default_body_lineno ) );
307 sub strip_outer_brackets
{
308 my ($method_body) = @_;
309 die "First character in $method_body is not a {"
310 unless substr( $method_body, 0, 1 ) eq '{';
312 die "Last character in $method_body is not a }"
313 unless substr( $method_body, -1, 1 ) eq '}';
315 return substr $method_body, 1, -1;
318 sub extract_bracketed_body_text
{
319 my ( $body_text, $bracketed ) = @_;
320 my ( $extracted, $remaining ) = extract_bracketed
( $body_text, $bracketed );
321 return ( strip_outer_brackets
($extracted), $remaining );
324 =head2 C<parse_top_level()>
326 my ($preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines)
327 = parse_top_level(\$code);
329 B<Purpose:> Extract a pmc signature from the code ref.
331 B<Argument:> PMC file contents slurped by C<parse_pmc()>.
333 B<Return Values:> List of seven elements:
339 the code found before the pmc signature;
347 a hash ref containing the flags associated with the pmc (such as
348 C<extends> and C<provides>).
352 the list of parents this pmc extends
360 the code found after the pmc body
364 number of newlines in the pmc signature that need to be added to the
365 running total of lines in the file
369 B<Comments:> Called internally by C<parse_pmc()>.
373 sub parse_top_level
{
376 my $top_level_re = qr{
377 ^ # beginning of line
382 pmclass
# pmclass keyword
385 ((?
:\s
+\w
+)*) # pmc attributes
388 \
{ # pmc body beginning marker
390 $code =~ s
[$top_level_re][{]smx
or die "No pmclass found\n";
391 my ( $preamble, $pmc_signature, $pmcname, $attributes ) = ( $1, $2, $3, $4 );
393 my $chewed_lines = count_newlines
($pmc_signature);
394 my ( $flags, $parents ) = parse_flags
( $attributes, $pmcname );
395 my ( $body, $postamble ) = extract_balanced
($code);
398 $body = strip_outer_brackets
($body);
400 return ( $preamble, $pmcname, $flags, $parents, $body, $postamble, $chewed_lines );
403 our %has_value = map { $_ => 1 } qw(does group hll);
404 our %has_values = map { $_ => 1 } qw(provides extends maps lib);
406 =head2 C<parse_flags()>
408 my ($flags, $parents) = parse_flags($attributes, $pmcname);
410 B<Purpose:> Extract a pmc signature from the code ref.
412 B<Argument:> PMC file contents slurped by C<parse_pmc()>.
414 B<Return Values:> List of two elements:
420 a hash ref containing the flags associated with the pmc (such as
421 C<extends> and C<provides>).
425 the list of parents this pmc extends
429 B<Comments:> Called internally by C<parse_top_level()>.
434 my ( $data, $pmcname ) = @_;
436 my ( $flags, @parents );
438 my @words = $data =~ /(\w+)/g;
441 my $name = shift @words;
442 if ( $has_value{$name} || $has_values{$name} ) {
443 my $value = shift @words;
444 die "Parser error: no value for '$name'" unless $value;
446 if ( $name eq 'extends' ) {
447 push @parents, $value;
449 elsif ( $has_values{$name} ) {
450 $flags->{$name}{$value} = 1;
453 $flags->{$name} = $value;
461 # setup some defaults
462 if ( $pmcname ne 'default' ) {
463 push @parents, 'default' unless @parents;
464 $flags->{provides
}{scalar} = 1 unless $flags->{provides
};
467 return ( $flags, \
@parents );
470 =head2 C<extract_balanced()>
472 ($pmcbody, $post) = extract_balanced($code);
474 B<Purpose:> Remove a balanced C<{}> construct from the beginning of C<$code>.
475 Return it and the remaining code.
477 B<Argument:> The code ref which was the first argument provided to
480 B<Return Values:> List of two elements:
486 String beginning with C<{> and ending with C<}>. In between is found C code
487 where the comments hold strings of Perl comments written in POD.
491 String holding the balance of the code. Same style as first element, but
496 B<Comments:> Called twice within C<parse_pmc()>. Will die with error message
497 C<Badly balanced> if not balanced.
501 sub extract_balanced
{
505 die "Unexpected whitespace, expecting" if $code =~ /^\s+/;
506 die "bad block open: ", substr( $code, 0, 40 ), "..." unless $code =~ /^\{/;
508 # create a copy and remove strings and comments so that
509 # unbalanced {} can be used in them in PMCs, being careful to
510 # preserve string length.
513 ( ' (?: \\. | [^'] )* ' # remove ' strings
514 | " (?: \\. | [^"] )* " # remove " strings
515 | /\* .*? \*/ ) # remove C comments
517 [ "-" x
length $1 ]sexg
;
519 while (/ (\{) | (\}) /gx) {
525 return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced;
529 die "Badly balanced PMC source\n" if $unbalanced;
533 =head2 C<parse_method_attrs()>
535 $attrs = parse_method_attrs($method_attributes);
537 B<Purpose:> Parse a list of method attributes and return a hash ref of them.
539 B<Arguments:> String captured from regular expression.
541 B<Return Values:> Reference to hash of attribute values.
543 B<Comments:> Called within C<parse_pmc()>.
547 sub parse_method_attrs
{
551 ++$result{$1} while $flags =~ /:(\w+)/g;
560 # cperl-indent-level: 4
563 # vim: expandtab shiftwidth=4: