Updated release tag for trunk changes 4328 to 4330.
[docutils.git] / src / prest.prl
blob2b621711851ff76eabca54d023e7322fd4741311
1 #!/usr/local/bin/perl -w
3 # $Id$
5 =pod
6 =begin reST
7 =begin Id
8 Id: ${TOOL_ID}
9 Copyright (C) 2002-2005 Freescale Semiconductor
10 Distributed under terms of the GNU General Public License (GPL).
11 =end Id
13 =begin Description
14 Description of ${TOOL_NAME}
15 ===========================
16 This program converts the DocUtils reStructuredText or
17 Document Object Model (DOM) (aka pseudo-XML) formats into an output
18 format. The default output format is HTML, but different formats can
19 be specified by using different writer schemas.
21 =end Description
22 =begin Usage
23 Usage: ${TOOL_NAME} [options] file(s)
25 Options:
26 -d Print debugging info on STDERR. May be used multiple
27 times to get more information.
28 -h Print full usage help
29 -w <writer> Process the writer schema from <writer>.wrt (default 'html')
30 -D var[=val] Define a variable that affects parsing (may be multiple)
31 -W var[=val] Define a variable that affects a writer (may be multiple)
32 -V Print version info
34 Available writers: ${\WriterList()}.
35 =end Usage
36 =end reST
37 =cut
39 # See comments in DOM.pm for DOM structure.
41 # Data structures:
42 # _`Handler`: Hash reference with the following
43 # keys:
44 # ``tag``: Regular expression for tag matching
45 # ``line``: Line number where function is defined
46 # ``text``: Textual representation of the code to run on tag match
47 # ``code``: Code reference for the code to run on tag match.
48 # The code is a subroutine with two arguments:
50 # the matching DOM object
52 # the string returned recursively from the content
53 # of this DOM.
55 # It needs to return a string. Any string returned by the
56 # top level is printed to STDOUT.
57 # _`Handler array`: Reference to array of handler objects.
59 # Global variables:
60 # ``$main::TOP_FILE``: Name of the top-level file being processed.
61 # ``%main::HANDLER``: Hash whose keys are process phases and whose
62 # values are references to handler arrays.
63 # ``%main::PHASE``: The writer phase currently being processed.
64 # ``@main::PHASES``: Order in which process phases are evaluated.
65 # ``$main::opt_d``: Debug mode
66 # ``$main::opt_w``: The writer schema to be used.
67 # ``%main::opt_D``: Hash whose keys are names of variables whose
68 # defines are specified on the command line
69 # with -D and whose values are the associated
70 # value (or 1 if no value is supplied)
71 # ``%main::opt_W``: Hash whose keys are names of variables whose
72 # defines are specified on the command line
73 # with -W and whose values are the associated
74 # value (or 1 if no value is supplied)
75 # ``$main::MY_DIR``: The real directory in which the prest script lives
76 # ``$main::TOOL_ID``: The tool name and release number
77 # ``$main::VERSION``: The prest version
79 use strict;
81 use vars qw($opt_V $opt_h $opt_d $opt_w %opt_W %opt_D);
82 use vars qw($TOOL_NAME $YEAR $TOP_FILE $SVNID $SVNNAME $VERSION
83 $TOOL_ID $MY_DIR);
84 use vars qw(%HANDLER @PHASES $PHASE);
86 main();
88 BEGIN {
89 $SVNID = '$Id$ ';
90 $SVNNAME = '$URL$ ';
91 $VERSION = ($SVNNAME =~ s|.*/tags/||) ?
92 join('.', map ($_+0, $SVNNAME =~ /(\d+)/g)) : '-unreleased-';
93 $SVNID =~ /Id: (\S+?) \S+ (\d+)/;
94 $TOOL_ID = "$1 release $VERSION";
95 $YEAR = $2;
96 ($TOOL_NAME = $1) =~ s/\..*//;
97 use FindBin;
98 $MY_DIR = $1 if $FindBin::RealBin =~ m|^(/.*)$|;
99 push @INC, $MY_DIR;
102 # The main entry point. Parses command-line options, preprocesses the
103 # writer schema, causes the document(s) to be read, and calls the writer.
104 sub main {
105 use Getopt::Long;
106 # Set default option values
107 $opt_w = "html";
108 $opt_d = 0;
110 # Parse options
111 Getopt::Long::config('no_ignore_case');
112 Usage() unless GetOptions qw(d+ h w=s D:s% W:s% V);
113 # Give usage information
114 Usage('Description') if $opt_h;
115 Usage('Id') if $opt_V;
116 Usage() unless @ARGV;
118 # Set default of 1 for unspecified -W options
119 foreach (keys %opt_W) {
120 $opt_W{$_} = 1 if defined $opt_W{$_} && $opt_W{$_} eq '';
122 # Initialize defined variables
123 use PrestConfig;
124 foreach my $key (keys %opt_W) {
125 (my $var = $key) =~ tr/a-zA-Z/_/c;
126 no strict 'refs';
127 ${"Eval_::$var"} = $opt_W{$key};
129 # Process -D variables
130 my %report_levels = (info=>1, warning=>2, error=>3, severe=>4, none=>5);
131 $opt_D{report} = do {local $^W=0; # Temporarily shut off warnings
132 main::FirstDefined($report_levels{$opt_D{report}},
133 $opt_D{report})} ;
135 ParseSchema($opt_w);
137 # Precompile bare "subroutines"
138 foreach my $handler (@{$HANDLER{''}}) {
139 # Need to untaint the text for the subroutine.
140 my $text = $1 if ($handler->{text} || '') =~ /(.*)/s;
141 DoEval($text, $handler->{line}, $handler->{tag});
143 # Precompile the handler routines
144 my $phase;
145 foreach $phase (keys %HANDLER) {
146 my $handler;
147 foreach $handler (@{$HANDLER{$phase}}) {
148 # Need to untaint the text for the subroutine.
149 my $text = $1 if ($handler->{text} || '') =~ /(.*)/s;
150 $handler->{code} = DoEval($text, $handler->{line});
154 my $first_line = <>;
155 my $DOM;
156 $first_line = "" if ! defined $first_line;
157 my $eof = eof;
158 # Handle all the documents
159 while (defined $first_line) {
160 $TOP_FILE = $ARGV;
161 if ($first_line =~ /^<document/) {
162 # We have a DOM for input, rather than an rst file
163 ($DOM, $first_line) = ParseDOM($first_line);
165 else {
166 use RST;
167 ($DOM, $first_line, $eof) = RST::Parse($first_line, $eof);
169 # Now compute the output string
170 use DOM;
171 my $str = ProcessDOM($DOM);
172 print $str if defined $str;
176 # Precompiles a subroutine that evaluates an expression.
177 # Arguments: string expression, line number, optional subroutine name
178 # Returns: anonymous subroutine reference
179 # Exceptions: Program termination if error in evaluation
180 # Uses globals: None
181 # Sets globals: ``Eval_::<subname>``
182 sub DoEval {
183 my ($str, $line, $subname) = @_;
184 my ($file, $lineno) = $line =~ /(.*), line (\d+)/;
185 print STDERR "$line\n" if $opt_d >= 1;
186 $subname = $line unless $subname;
187 $subname =~ s/\W/_/g;
188 my $sub = "sub Eval_::$subname {package Eval_; $str}";
189 my $val = eval(qq(\# @{[$lineno+1]} "$file"\n$sub));
190 die "Error: $line: $@" if $@;
191 return \&{$Eval_::{$subname}};
194 # Returns the first defined value in an array.
195 # Argument: array
196 # Returns: value
197 sub FirstDefined {
198 foreach (@_) {
199 return $_ if defined $_;
201 return;
204 # Parses a file in the DOM (pseudo-XML) format.
205 # Arguments: First line of file
206 # Returns: DOM object
207 # Uses globals: <> file handle
208 sub ParseDOM {
209 my ($first_line) = @_;
210 my $last_indent = -1;
211 my @stack;
212 my @indents;
213 my $tos; # top of stack
214 my $main;
215 $_ = $first_line;
216 goto parse;
217 while (<>) {
218 parse:
219 /(\s*).*/;
220 my $spaces = $1;
221 my $indent = length($spaces);
222 if (@stack > 0) {
223 my $i;
224 for ($i=0; $i < @indents; $i++) {
225 last if $indent <= $indents[$i]+1;
227 splice(@stack, $i);
228 splice(@indents, $i);
229 $tos = $stack[-1];
232 if (/^(\s*)<(\w+)\s*([^>]*)>\s*$/) {
233 my ($spaces, $tag, $attrlist) = ($1, $2, $3);
234 my $entity = { tag=>$tag, parent=>$tos,
235 text=>substr($_,$indent) };
236 while ($attrlist ne '') {
237 if ($attrlist =~ s/^([\w:]+)="([^\"]*)"\s*// ||
238 $attrlist =~ s/^([\w:]+)='([^\"]*)'\s*//) {
239 $entity->{attr}{$1} = $2;
241 elsif ($attrlist =~ s/^(\w+)\s*//) {
242 $entity->{attr}{$1} = undef;
244 else {
245 goto pcdata;
248 if (@stack > 0) {
249 push @{$stack[-1]->{content}}, $entity;
250 $tos = $entity;
252 else {
253 $main = $entity;
255 push (@stack, $entity);
256 push (@indents, $indent);
257 $tos = $entity;
258 $tos->{content} = [];
260 else {
261 pcdata:
262 substr($_,0,$indents[-1]+4) = "";
263 chomp;
264 my $text = $_;
265 my $ncontent = @{$tos->{content}};
266 if ($ncontent > 0 &&
267 $tos->{content}[$ncontent-1]{tag} eq '#PCDATA') {
268 $tos->{content}[$ncontent-1]{text} .= "$text\n";
270 else {
271 my $entity = { tag=>'#PCDATA', text=>"$text\n" };
272 push(@{$tos->{content}}, $entity);
277 #use PrintVar;PrintVar::PrintVar($main);print "\n";
278 $main->{attr}{source} = $ARGV;
279 return $main;
282 # Parses the writer's schema file.
283 # Arguments: file name
284 # Returns: None
285 # Modifies globals: %HANDLER
286 sub ParseSchema {
287 my ($writer) = @_;
289 my $file = $writer;
290 use vars qw($newfile);
291 local $newfile = $file;
292 my @path = (".", $MY_DIR);
293 my @dirs = grep(-r "$_/$file.wrt", @path);
294 die "Cannot find schema for writer $writer" unless @dirs;
295 $file = "$dirs[0]/$file.wrt" if defined $dirs[0];
296 no strict 'refs';
297 open $newfile,$file or die "Cannot open file $file";
299 my %phases;
300 my $phase = '';
301 my $nest = my $in_sub = 0;
302 # Note: Turn warnings off while reading from newfile since it will
303 # cause a "read of closed filehandle" warning with -w.
304 while (do { local $^W=0; $_ = <$newfile> }) {
305 next unless defined $_;
306 # Make sure $. is relative to the current file
307 close $newfile if eof;
308 if ($nest <= 1 && ! $in_sub) {
309 next if /^=pod/ .. /^=cut/;
310 next if /^\s*$/ || /^\s*\#/;
311 if (/^\s*(?:(phase|sub)\s+)?(\S+)\s*(=\s*)?\{\s*(?:\#.*)?$/i) {
312 if ($nest == 0 && $1 eq 'phase') {
313 $phase = $2;
314 push @PHASES, $phase unless $phases{$phase}++;
316 else {
317 my $tag = $2;
318 push(@{$HANDLER{$phase}},
319 {tag=>$tag, line=>"$newfile.wrt, line $."});
320 $in_sub = $nest+1;
322 $nest++;
324 elsif (/^\s*\}\s*$/) {
325 $nest--;
327 else {
328 die "$file:$.: Parse error: $_";
331 else {
332 my $left = y/\{/\{/;
333 my $right = y/\}/\}/;
334 $nest += ($left - $right);
335 $HANDLER{$phase}[-1]{text} .= $_ if $nest >= $in_sub;
336 $in_sub = 0 if $nest < $in_sub;
339 close $newfile;
342 # Passes the DOM through all phases of the writer and returns the
343 # output string.
344 # Arguments: parsed DOM
345 # Returns: string
346 sub ProcessDOM {
347 my ($dom) = @_;
348 my $str = '';
349 foreach $PHASE (@PHASES) {
350 $str .= ProcessDOMPhase($dom);
352 return $str;
355 # Passes the DOM through a specific phase of the writer and returns
356 # the output string. Uses the current phase if no phase is specified.
357 # Arguments: parsed DOM, optional phase name
358 # Returns: string returned from processing the phase
359 sub ProcessDOMPhase {
360 my ($dom, $phase) = @_;
361 $phase = $PHASE unless defined $phase;
362 my $str = TraverseDOM($dom, $HANDLER{$phase});
363 return defined $str ? $str : '';
366 # Internal routine to traverse a parsed document object model (DOM)
367 # object and applies all the handler routines to their tags.
368 # Arguments: parsed DOM, ref to array of handler hash references.
369 sub TraverseDOM {
370 my ($dom, $handarray) = @_;
371 my $searchstring = "^(?:" . join('|',map("($_->{tag})",@$handarray)) .
372 ')$';
373 TraverseDOM_($dom, $handarray, $searchstring);
376 # Internal routine called by TraverseDOM to do recursive handling of DOM tree.
377 # Arguments: parsed DOM, handler array reference, search string
378 sub TraverseDOM_ {
379 my ($dom, $handarray, $searchstring) = @_;
380 my @matches = $dom->{tag} =~ /$searchstring/;
381 my @match = grep(defined $matches[$_], (0 .. $#{$handarray}));
382 my $match = $match[0];
383 my $str;
384 if (! defined $match || $match <= $#{$handarray}) {
385 my $content;
386 foreach $content (@{$dom->{content}}) {
387 my $val = TraverseDOM_($content, $handarray, $searchstring);
388 $content->{val} = $val;
390 my $substr = join('',map(defined $_->{val} ? $_->{val} : '',
391 @{$dom->{content}}));
392 if (defined $match) {
393 print STDERR "$PHASE: $dom->{tag}\n" if $opt_d >= 1;
394 $str = eval { &{$handarray->[$match]{code}}($dom, $substr) };
395 print STDERR "$str\n"
396 if $opt_d >= 2 && defined $str && $str ne '';
397 die "Error: $handarray->[$match]{line}: $@" if $@;
400 return $str;
403 # Gets list of writers
404 # Arguments: none
405 # Returns: list of writers
406 sub WriterList {
407 my ($dir,@writers);
408 foreach $dir (@INC) {
409 push(@writers, glob("$dir/*.wrt"));
411 grep(s|.*/([^/]+)\.wrt$|$1|, @writers);
412 return join(', ', @writers);
415 # Extracts and prints usage information
416 # Arguments: type of usage, end marker for usage (optional)
417 sub Usage {
418 my ($what) = @_;
419 $what = "Usage" if ! $what;
420 my $mark = $what eq 'Description' ? "($what|Usage)" : $what;
421 if (open(ME,$0) == 1) {
422 while (<ME>) {
423 if ((/^=begin $mark/ .. /^=end $mark/) &&
424 ! /^=(begin|end) $mark/) {
425 s/(\$\{[^\}]+\})/eval($1)/ge;
426 print;
429 close(ME);
431 if ($what =~ /Description/) {
432 my @used = qw(RST Transforms);
433 my %used;
434 @used{@used} = (1) x @used;
435 my $use;
436 foreach $use (@used) {
437 my @rst_dir = grep (-r "$_/$use.pm", @INC);
438 if (@rst_dir) {
439 my $newline_done;
440 my $file = "$rst_dir[0]/$use.pm";
441 open(USE, $file) or die "Cannot open $file";
442 while (<USE>) {
443 print "\n" unless $newline_done++;
444 if ((/^=begin $mark/ .. /^=end $mark/) &&
445 ! /^=(begin|end) $mark/) {
446 s/(\$\{[^\}]+\}+)/eval $1/ge;
447 print;
450 close USE;
453 my (@directives, %directives);
454 my $dir;
455 foreach $dir (@INC) {
456 grep(m|([^/]+)$| && ($directives{$1} = $_),
457 glob "$dir/Directive/*.pm");
459 @directives = map($directives{$_}, sort keys %directives);
460 my $directive;
461 foreach $directive (@directives) {
462 $directive =~ m|([^/]+)\.pm|;
463 my $fname = $1;
464 next if $used{$fname} || ! -r $directive;
465 my $output = 0;
466 open(DIRECTIVE, $directive) or die "Cannot open $directive";
467 while (<DIRECTIVE>) {
468 if ((/^=begin $mark/ .. /^=end $mark/) &&
469 ! /^=(begin|end) $mark/) {
470 if (! $output++) {
471 my $title = "Documentation for plug-in directive '$fname'";
472 print "\n$title\n",('-' x length($title)),"\n";
474 s/(\$\{[^\}]+\})/eval $1/ge;
475 print;
478 close DIRECTIVE;
481 my @writers;
482 foreach $dir (@INC) {
483 push(@writers, glob("$dir/*.wrt"));
485 my $writer;
486 foreach $writer (@writers) {
487 my $output = 0;
488 open(WRITER, $writer) or die "Cannot open $writer";
489 while (<WRITER>) {
490 if ((/^=begin $mark/ .. /^=end $mark/) &&
491 ! /^=(begin|end) $mark/) {
492 if (! $output++) {
493 $writer =~ m|([^/]+)\.wrt$|;
494 my $title = "Documentation for writer '$1'";
495 print "\n$title\n",('-' x length($title)),"\n";
497 s/(\$\{[^\}]+\})/eval $1/ge;
498 print;
501 close WRITER;
505 else {
506 print STDERR "Usage not available.\n";
508 exit (1);