tagged release 0.7.1
[parrot.git] / lib / Parrot / Pmc2c / Parser.pm
blob48d6b911c475cb13ac7736c365e8ef75cc4fe61c
1 # Copyright (C) 2004-2008, The Perl Foundation.
2 # $Id$
4 package Parrot::Pmc2c::Parser;
6 use strict;
7 use warnings;
9 use base qw( Exporter );
11 our @EXPORT_OK = qw( parse_pmc extract_balanced );
12 use Parrot::Pmc2c::PMC ();
13 use Parrot::Pmc2c::Attribute;
14 use Parrot::Pmc2c::Method ();
15 use Parrot::Pmc2c::Emitter ();
16 use Parrot::Pmc2c::UtilFunctions qw(count_newlines filename slurp);
17 use Text::Balanced 'extract_bracketed';
19 =head1 NAME
21 Parrot::Pmc2c::Parser - PMC Parser
23 =head1 SYNOPSIS
25 use Parrot::Pmc2c::Parser;
27 =head1 DESCRIPTION
29 Parrot::Pmc2c::Parser parses a sudo C syntax into a perl hash that is then dumped.
32 =head2 C<parse_pmc()>
34 $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename);
36 B<Purpose:> Parse PMC code and return a hash ref of pmc attributes.
38 B<Arguments:> List of two arguments:
40 =over 4
42 =item *
44 The pmc2cMain object
46 =item *
48 Filename of the pmc to parse.
50 =back
52 B<Return Values:> Reference to a Parrot::Pmc2c::PMC object
54 B<Comments:> Called by C<dump_pmc()>.
56 =cut
58 sub parse_pmc {
59 my ( $pmc2cMain, $filename ) = @_;
61 #slurp file contents
62 $filename = $pmc2cMain->find_file( filename( $filename, '.pmc' ), 1 );
63 my $code = slurp($filename);
65 my ( $preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) =
66 parse_top_level($code);
68 my $pmc = Parrot::Pmc2c::PMC->create($pmcname);
69 $pmc->preamble( Parrot::Pmc2c::Emitter->text( $preamble, $filename, 1 ) );
70 $pmc->name($pmcname);
71 $pmc->set_filename($filename);
72 $pmc->set_flags($flags);
73 $pmc->set_parents($parents);
75 # the +1 puts us on the current line
76 my $lineno = count_newlines($preamble) + $chewed_lines + 1;
77 my $class_init;
79 ($lineno, $pmcbody) = find_attrs( $pmc, $pmcbody, $lineno, $filename);
80 ($lineno, $class_init) = find_methods($pmc, $pmcbody, $lineno, $filename);
82 $pmc->postamble( Parrot::Pmc2c::Emitter->text( $post, $filename, $lineno ) );
84 # ensure class_init is the last method in the method list
85 $pmc->add_method($class_init) if $class_init;
86 $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") );
87 $pmc->pre_method_gen();
89 return $pmc;
92 sub find_attrs {
93 my ($pmc, $pmcbody, $lineno, $filename) = @_;
95 # backreferences here are all +1 because below the qr is wrapped in quotes
96 my $attr_re = qr{
98 (?:
99 [;\n\s]* # blank spaces and spurious semicolons
100 (?:/\*.*?\*/)? # C comments
103 # attribute marker
104 ATTR
106 # type
108 (INTVAL|FLOATVAL|STRING\s+\*|PMC\s+\*|(?:struct\s+)?\w+\s+\*+|Parrot_\w*)
110 # name
112 (\w+)
114 # modifiers
116 ((?::\w+\s*)*)
118 # declaration terminator
121 # optional comment
123 (/\*.*?\*/)?
124 }sx;
126 while ($pmcbody =~ s/($attr_re)//) {
127 my ($type, $name, @modifiers, $comment);
128 $type = $2;
129 $name = $3;
130 @modifiers = split /\s/, $4;
131 $comment = $5;
133 $lineno++;
135 $pmc->add_attribute(Parrot::Pmc2c::Attribute->new(
137 name => $name,
138 type => $type,
139 modifiers => \@modifiers,
144 return ($lineno, $pmcbody);
147 sub find_methods {
148 my ($pmc, $pmcbody, $lineno, $filename) = @_;
149 my $class_init = 0;
151 # backreferences here are all +1 because below the qr is wrapped in quotes
152 my $signature_re = qr{
155 [;\n\s]* # blank spaces and spurious semicolons
156 (?:/\*.*?\*/)? # C comments
159 ((?:PARROT_\w+\s+)+)? # decorators
161 # vtable|method marker
162 (?:(VTABLE|METHOD)\s+)?
164 ((?:\w+\s*?\**\s*)?\w+) # method name (includes return type)
166 \(( # parameters
167 (?:\w+\s*\*? # type (pointer optional)
169 \w+ # argument name
171 \s+:\w+ # attribute
172 (?:\("[^\)]+"\))? # with optional parameter
174 ,?\s* # probably a comma and whitespace
175 )* # zero or more of these bad boys
178 ((?::(\w+)\s*)*) # method attrs
180 }sx;
182 while ( $pmcbody =~ s/($signature_re)// ) {
183 my ( $decorators, $marker, $methodname, $parameters, $rawattrs ) =
184 ( $2, $3, $4, $5, $6 );
185 my $attrs = defined $rawattrs ? parse_method_attrs($rawattrs) : {};
186 $lineno += count_newlines($1);
188 my $returntype = '';
190 if ($methodname =~ /(.*\s+\*?)(\w+)/) {
191 ($returntype, $methodname) = ($1, $2);
194 ( my $methodblock, $pmcbody ) = extract_balanced($pmcbody);
196 $methodblock = strip_outer_brackets($methodblock);
198 # remove pmclass 4 space indent
199 $methodblock =~ s/^[ ]{4}//mg;
201 # trim trailing ws from last line
202 $methodblock =~ s/\n[\t ]+$/\n/g;
204 $decorators ||= '';
205 $decorators =~ s/^\s*(.*?)\s*$/$1/s;
206 $decorators = [ split /\s+/ => $decorators ];
208 $returntype = 'void' if (defined $marker && $marker eq 'METHOD');
210 my $method = Parrot::Pmc2c::Method->new(
212 name => $methodname,
213 parent_name => $pmc->name,
214 body => Parrot::Pmc2c::Emitter->text( $methodblock, $filename, $lineno ),
215 return_type => $returntype,
216 parameters => $parameters,
217 type => Parrot::Pmc2c::Method::VTABLE,
218 attrs => $attrs,
219 decorators => $decorators,
223 # METHOD needs FixedIntegerArray header
224 if ( $marker and $marker =~ /METHOD/ ) {
225 Parrot::Pmc2c::PCCMETHOD::rewrite_pccmethod( $method, $pmc );
226 $pmc->set_flag('need_fia_header');
229 # PCCINVOKE needs FixedIntegerArray header
230 $pmc->set_flag('need_fia_header') if $methodblock =~ /PCCINVOKE/;
232 # the class_init method is added last after all other methods
233 if ( $methodname eq 'class_init' ) {
234 $class_init = $method;
236 else {
238 # Name-mangle NCI methods to avoid conflict with vtable methods.
239 if ( $marker and $marker !~ /VTABLE/ ) {
240 $method->type(Parrot::Pmc2c::Method::NON_VTABLE);
241 $method->name("nci_$methodname");
242 $method->symbol($methodname);
245 parse_mmds( $method, $filename, $lineno )
246 if $methodblock =~ /\bMMD_(\w+):/;
248 $pmc->add_method($method);
251 $lineno += count_newlines($methodblock);
254 return ($lineno, $class_init);
257 sub parse_mmds {
258 my ( $method, $filename, $lineno ) = @_;
259 my $mmd_methods = [];
260 my $body_text = $method->body;
261 my $default_body = $body_text;
262 my $default_body_lineno = $lineno;
264 # now split into MMD if necessary:
265 while ( $body_text =~ s/(\bMMD_(\w+):\s*)// ) {
267 $lineno += count_newlines($1);
268 my $right_type = $2;
270 $method->add_mmd_rights($right_type);
272 ( my $mmd_part, $body_text ) = extract_bracketed_body_text( $body_text, '{' );
274 die "Empty MMD body near '$body_text'" unless $mmd_part;
275 my $mmd_part_lines = count_newlines($mmd_part);
277 # remove whitespace at end of last line
278 $mmd_part =~ s/\n\s*$/\n/s;
280 if ( $right_type eq 'DEFAULT' ) {
281 $default_body = $mmd_part;
282 $default_body_lineno = $lineno;
284 else {
285 my $mmd_method = Parrot::Pmc2c::Method->new(
287 name => $method->name . "_$right_type",
288 parent_name => $method->parent_name,
289 body => Parrot::Pmc2c::Emitter->text( $mmd_part, $filename, $lineno ),
290 return_type => $method->return_type,
291 parameters => $method->parameters,
292 type => Parrot::Pmc2c::Method::VTABLE,
293 attrs => $method->attrs,
294 right => $right_type,
298 push @{$mmd_methods}, $mmd_method;
301 $lineno += $mmd_part_lines;
303 $method->mmds($mmd_methods);
304 $method->body( Parrot::Pmc2c::Emitter->text( $default_body, $filename, $default_body_lineno ) );
307 sub strip_outer_brackets {
308 my ($method_body) = @_;
309 die "First character in $method_body is not a {"
310 unless substr( $method_body, 0, 1 ) eq '{';
312 die "Last character in $method_body is not a }"
313 unless substr( $method_body, -1, 1 ) eq '}';
315 return substr $method_body, 1, -1;
318 sub extract_bracketed_body_text {
319 my ( $body_text, $bracketed ) = @_;
320 my ( $extracted, $remaining ) = extract_bracketed( $body_text, $bracketed );
321 return ( strip_outer_brackets($extracted), $remaining );
324 =head2 C<parse_top_level()>
326 my ($preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines)
327 = parse_top_level(\$code);
329 B<Purpose:> Extract a pmc signature from the code ref.
331 B<Argument:> PMC file contents slurped by C<parse_pmc()>.
333 B<Return Values:> List of seven elements:
335 =over 4
337 =item *
339 the code found before the pmc signature;
341 =item *
343 the name of the pmc
345 =item *
347 a hash ref containing the flags associated with the pmc (such as
348 C<extends> and C<provides>).
350 =item *
352 the list of parents this pmc extends
354 =item *
356 the body of the pmc
358 =item *
360 the code found after the pmc body
362 =item *
364 number of newlines in the pmc signature that need to be added to the
365 running total of lines in the file
367 =back
369 B<Comments:> Called internally by C<parse_pmc()>.
371 =cut
373 sub parse_top_level {
374 my $code = shift;
376 my $top_level_re = qr{
377 ^ # beginning of line
378 (.*?) # preamble
382 pmclass # pmclass keyword
383 \s+ # whitespace
384 ([\w]*) # pmc name
385 ((?:\s+\w+)*) # pmc attributes
386 \s* # whitespace
388 \{ # pmc body beginning marker
389 }smx;
390 $code =~ s[$top_level_re][{]smx or die "No pmclass found\n";
391 my ( $preamble, $pmc_signature, $pmcname, $attributes ) = ( $1, $2, $3, $4 );
393 my $chewed_lines = count_newlines($pmc_signature);
394 my ( $flags, $parents ) = parse_flags( $attributes, $pmcname );
395 my ( $body, $postamble ) = extract_balanced($code);
397 # trim out the { }
398 $body = strip_outer_brackets($body);
400 return ( $preamble, $pmcname, $flags, $parents, $body, $postamble, $chewed_lines );
403 our %has_value = map { $_ => 1 } qw(does group hll);
404 our %has_values = map { $_ => 1 } qw(provides extends maps lib);
406 =head2 C<parse_flags()>
408 my ($flags, $parents) = parse_flags($attributes, $pmcname);
410 B<Purpose:> Extract a pmc signature from the code ref.
412 B<Argument:> PMC file contents slurped by C<parse_pmc()>.
414 B<Return Values:> List of two elements:
416 =over 4
418 =item *
420 a hash ref containing the flags associated with the pmc (such as
421 C<extends> and C<provides>).
423 =item *
425 the list of parents this pmc extends
427 =back
429 B<Comments:> Called internally by C<parse_top_level()>.
431 =cut
433 sub parse_flags {
434 my ( $data, $pmcname ) = @_;
436 my ( $flags, @parents );
438 my @words = $data =~ /(\w+)/g;
440 while ( @words ) {
441 my $name = shift @words;
442 if ( $has_value{$name} || $has_values{$name} ) {
443 my $value = shift @words;
444 die "Parser error: no value for '$name'" unless $value;
446 if ( $name eq 'extends' ) {
447 push @parents, $value;
449 elsif ( $has_values{$name} ) {
450 $flags->{$name}{$value} = 1;
452 else {
453 $flags->{$name} = $value;
456 else {
457 $flags->{$name} = 1;
461 # setup some defaults
462 if ( $pmcname ne 'default' ) {
463 push @parents, 'default' unless @parents;
464 $flags->{provides}{scalar} = 1 unless $flags->{provides};
467 return ( $flags, \@parents );
470 =head2 C<extract_balanced()>
472 ($pmcbody, $post) = extract_balanced($code);
474 B<Purpose:> Remove a balanced C<{}> construct from the beginning of C<$code>.
475 Return it and the remaining code.
477 B<Argument:> The code ref which was the first argument provided to
478 C<parse_pmc()>.
480 B<Return Values:> List of two elements:
482 =over 4
484 =item *
486 String beginning with C<{> and ending with C<}>. In between is found C code
487 where the comments hold strings of Perl comments written in POD.
489 =item *
491 String holding the balance of the code. Same style as first element, but
492 without the braces.
494 =back
496 B<Comments:> Called twice within C<parse_pmc()>. Will die with error message
497 C<Badly balanced> if not balanced.
499 =cut
501 sub extract_balanced {
502 my $code = shift;
503 my $unbalanced = 0;
505 die "Unexpected whitespace, expecting" if $code =~ /^\s+/;
506 die "bad block open: ", substr( $code, 0, 40 ), "..." unless $code =~ /^\{/;
508 # create a copy and remove strings and comments so that
509 # unbalanced {} can be used in them in PMCs, being careful to
510 # preserve string length.
511 local $_ = $code;
513 ( ' (?: \\. | [^'] )* ' # remove ' strings
514 | " (?: \\. | [^"] )* " # remove " strings
515 | /\* .*? \*/ ) # remove C comments
517 [ "-" x length $1 ]sexg;
519 while (/ (\{) | (\}) /gx) {
520 if ($1) {
521 $unbalanced++;
523 else { # $2
524 $unbalanced--;
525 return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced;
529 die "Badly balanced PMC source\n" if $unbalanced;
530 return;
533 =head2 C<parse_method_attrs()>
535 $attrs = parse_method_attrs($method_attributes);
537 B<Purpose:> Parse a list of method attributes and return a hash ref of them.
539 B<Arguments:> String captured from regular expression.
541 B<Return Values:> Reference to hash of attribute values.
543 B<Comments:> Called within C<parse_pmc()>.
545 =cut
547 sub parse_method_attrs {
548 my $flags = shift;
550 my %result;
551 ++$result{$1} while $flags =~ /:(\w+)/g;
553 return \%result;
558 # Local Variables:
559 # mode: cperl
560 # cperl-indent-level: 4
561 # fill-column: 100
562 # End:
563 # vim: expandtab shiftwidth=4: