initial
[prop.git] / tools / scripts / grammar2latex
blob0ef04d04a3baae34fe43af18e5b7b0ce9f307c73
1 #!/usr/bin/perl
2 #@
3 #@ grammar2latex: Generate typesetted LaTeX grammar from Prop's report.
4 #@ usage: grammar2latex [options] <report-file>
5 #@ options:
6 #@ [-S] Generate section only
7 #@ [-t] Emit output at stdout
8 #@ [-v] Verbose mode
9 #@ [-o<output-file>] Send output to a specified file
10 #@ [-D] Depth first presentation
11 #@ [-B] Breadth first presentation
13 require "getopts.pl";
15 $DEPTH_FIRST = 1;
16 $BAD = Getopts("hStvo:DB");
17 $HELP = $opt_h;
18 $TO_STDOUT = $opt_t;
19 $SECTION_ONLY = $opt_S;
20 $VERBOSE = $opt_v;
21 $OUTPUT_FILE = $opt_o;
22 $DEPTH_FIRST = 0 if $opt_B;
24 print_help_and_die() if (!$BAD || $HELP || $#ARGV != 0);
26 ###############################################################################
28 # Initialization.
30 ###############################################################################
31 $GRAMMAR2LATEX_VERSION="0.1";
32 $USER=$ENV{"USER"};
33 $DATE=`date`; chop($DATE);
34 $DOCUMENT_STYLE = "\\documentclass{article}";
35 $BAR = "|";
36 $STARTRC = "~$USER/.grammar2latex";
37 $INPUT_FILE = $ARGV[0];
38 $QUOTED_INPUT_FILE = quote($INPUT_FILE);
39 $SECTION = "\\section";
40 $SUBSECTION = "\\subsection";
41 $OUTPUT_FILE = "${INPUT_FILE}.tex" if ! $OUTPUT_FILE;
42 $NON_TERMINAL_FONT = "\\nonterm";
43 $EPSILON = '\epsilon';
44 $LINES_PER_PAGE = 50;
45 $CHARACTERS_PER_LINE = 66;
46 $CHARACTERS_PER_PRODUCTION = 40;
47 $HTTP = quote('http://chimera.cs.nyu.edu:8888/~leunga');
48 $EMAIL = quote('leunga@valis.cs.nyu.edu');
49 $LATEX_MARGINS =
51 \setlength{\textwidth}{6.5in}
52 \setlength{\evensidemargin}{-.2in}
53 \setlength{\oddsidemargin}{-.2in}
54 \setlength{\textheight}{9.00in}
55 \setlength{\topmargin}{-0.5in}
56 \setlength{\parskip}{2mm}
57 \setlength{\parindent}{0mm}
59 if ($OUTPUT_FILE =~ /^.*\/([^\/]+)$/)
60 { $OUTPUT_FILE = $1;
62 require "$STARTRC" if (-r $STARTRC);
64 ###############################################################################
66 # Setup data structures.
68 ###############################################################################
69 $SYNTAX_CLASS = "";
70 %terminals = ();
71 %symbols = ();
72 %keywords = ();
73 %others = ();
74 %non_terminals = ();
75 %P = ();
76 %Alts = ();
77 %children = ();
78 %child_list = ();
79 %non_term_no = ();
81 ###############################################################################
83 # Scan output file
85 ###############################################################################
86 open(INPUT,$INPUT_FILE) || die("$0: $!: $INPUT_FILE\n");
87 $line_no = 0;
88 while (<INPUT>)
89 { $line_no++;
90 if (/^{\d+}\s+(\<.*\>)\s+->\s+(.*)\s+/) # A special production
91 { $lhs = $1;
92 $rhs = $2;
93 if ($lhs eq '<start>')
94 { $non_terminals{$lhs} = 0;
95 my($toks) = split_rhs($rhs);
96 $P{$lhs}[$Alts{$lhs}++] = $toks;
97 build_graph("<start>",$toks);
99 next;
101 if (/^{\d+}\s+(.*)\s+->\s+(.*)\s+/) # A production
102 { $lhs = $1;
103 $rhs = $2;
104 $non_terminals{$lhs} = $line_no;
105 my($toks) = split_rhs($rhs);
106 $P{$lhs}[$Alts{$lhs}++] = $toks;
107 build_graph($lhs,$toks);
108 print STDERR '.' if $VERBOSE;
109 next;
111 if (/^{\d+}\s+\|\s+(.*)\s+/) # continuing
112 { $rhs = $1;
113 my($toks) = split_rhs($rhs);
114 $P{$lhs}[$Alts{$lhs}++] = $toks;
115 build_graph($lhs,$toks);
116 next;
118 $number_of_states = $1 if /^Number of states\s+=\s+(\d+)/;
119 $number_of_items = $1 if /^Number of items\s+=\s+(\d+)/;
120 $shift_reduce_conflicts = $1 if /^Number of shift\/reduce conflicts\s+=\s+(\d+)/;
121 $reduce_reduce_conflicts = $1 if /^Number of reduce\/reduce conflicts\s+=\s+(\d+)/;
122 $next_check_size = $1 if /^Max next\/check\s+=\s+(\d+)/;
123 $SYNTAX_CLASS = $1 if /^\[Syntax class (\S+)\]/;
126 ###############################################################################
128 # Check for consistency
130 ###############################################################################
131 if ($SYNTAX_CLASS eq "")
132 { print STDERR "$0: no syntax class found in file '$INPUT_FILE'\n";
133 exit(1);
135 $SYNTAX_CLASS = quote($SYNTAX_CLASS);
137 ###############################################################################
139 # Generate output
141 ###############################################################################
142 if ($TO_STDOUT)
143 { local(*OUTPUT) = *STDOUT; }
144 else
145 { open(OUTPUT,">$OUTPUT_FILE") || die("$0: $!: $OUTPUT_FILE\n");
148 print STDERR '[Printing header]' if $VERBOSE;
149 print_header();
150 print STDERR '[Building keyword list]' if $VERBOSE;
151 print_keywords();
152 print STDERR '[Computing index]' if $VERBOSE;
153 compute_nonterm_index();
154 print STDERR '[Printing grammar]' if $VERBOSE;
155 print_grammar();
156 print_diagnostics();
157 print_footer();
158 print STDOUT "$OUTPUT_FILE\n";
159 exit(0);
161 ###############################################################################
163 # Print help and die.
165 ###############################################################################
166 sub print_help_and_die
167 { open (HELP,$0) || die("$!: $0\n");
168 while (<HELP>)
169 { print STDERR $1, "\n" if /^#@ (.*)/;
171 exit(1);
174 ###############################################################################
176 # Quote LaTeX
178 ###############################################################################
179 sub quote
180 { my($text) = @_;
181 my($dollar) = '$';
182 $text =~ s/([_%&^$@!{}])/\\\1/g;
183 $text =~ s/\</${dollar}\\langle${dollar}/g;
184 $text =~ s/\>/${dollar}\\rangle${dollar}/g;
185 $text =~ s/~/{\\char126}/g;
186 return $text;
189 ###############################################################################
191 # Verbatim LaTeX
193 ###############################################################################
194 sub verbatim
195 { my($text) = @_;
196 if (! ($text =~ /\|/))
197 { return "\\verb|${text}|"; }
198 if (! ($text =~ /\./))
199 { return "\\verb.${text}."; }
200 if (! ($text =~ /\!/))
201 { return "\\verb!${text}!"; }
202 if (! ($text =~ /\@/))
203 { return "\\verb@${text}@"; }
204 die ("$0: can't verbatimize: $text\n");
207 ###############################################################################
209 # Make a LaTeX label
211 ###############################################################################
212 sub make_label
213 { my($label) = @_;
214 $label =~ s/_/--/g;
215 return $label;
218 ###############################################################################
220 # Check if a token is a symbol
222 ###############################################################################
223 sub is_symbol
224 { my($tok) = @_;
225 if ($tok =~ /^"[a-zA-Z_]/) { return 0; }
226 else { return 1; }
229 ###############################################################################
231 # Split string into tokens
233 ###############################################################################
234 sub split_rhs
235 { my($text) = @_;
236 my(@tokens) = ();
237 while (! $text =~ /^\s*$/)
238 { if ($text =~ /^("(([^"]|\\.)*)")\s*(.*)$/)
239 { push @tokens, $1; $text = $4;
240 if (is_symbol($1)) { $symbols{$2} = 1; }
241 else { $keywords{$2} = 1; }
242 next;
244 if ($text =~ /^('(([^']|\\.)*)')\s*(.*)$/)
245 { push @tokens, $1; $text = $4; $symbols{$2} = 1; next; }
246 if ($text =~ /^\?\s+(.*)$/)
247 { push @tokens, "<error>"; $text = $1; next; }
248 if ($text =~ /^\$\s+(.*)$/)
249 { push @tokens, "<EOF>"; $text = $1; next; }
250 if ($text =~ /^\<\d+\>\s+(.*)$/)
251 { $text = $1; next; }
252 if ($text =~ /^(\S+)\s+(.*)$/)
253 { push @tokens, $1; $text = $2; $others{$1} = 1; next; }
255 return \@tokens;
258 ###############################################################################
260 # Build the dependency graph
262 ###############################################################################
263 sub build_graph
264 { my($lhs,$rhs) = @_;
265 for $i (@{$rhs})
266 { if (! $children{$lhs}{$i})
267 { push @{$child_list{$lhs}}, $i;
268 #print STDERR "{$lhs -> $i}";
269 $children{$lhs}{$i} = 1;
274 ###############################################################################
276 # Print header
278 ###############################################################################
279 sub print_header
281 if (! $SECTION_ONLY)
282 { print OUTPUT <<EOF;
283 $DOCUMENT_STYLE
284 $LATEX_MARGINS
285 \\title{\\Large \\bf Syntax class $SYNTAX_CLASS\\thanks{
286 Generated from the file ``\\tt $QUOTED_INPUT_FILE'' using
287 {\\em grammar2latex} version $GRAMMAR2LATEX_VERSION on $DATE.
288 {\\em grammar2latex} is part of the {\\sf Prop} tool set.
289 Please visit {\\tt $HTTP} for more details.
292 \\author{User $USER}
293 \\begin{document}
294 \\newfont{\\nonterm}{cmssq9}
295 \\maketitle
300 ###############################################################################
302 # Print footer
304 ###############################################################################
305 sub print_footer
307 if (! $SECTION_ONLY)
309 print OUTPUT <<EOF;
310 \\end{document}
315 ###############################################################################
317 # Print keywords
319 ###############################################################################
320 sub print_keywords
321 { print OUTPUT <<END;
322 ${SECTION}\{Lexical structure}
323 \\begin{description}
324 \\item[keywords:]
325 \\begin{verbatim}
328 my($line,$k,$s) = "";
329 for $k (sort keys %keywords)
330 { $line = "$line $k";
331 if (length($line) >= $CHARACTERS_PER_LINE)
332 { print OUTPUT "$line\n"; $line = ""; }
334 if ($line ne "") { print OUTPUT "$line\n"; }
335 $line = "";
337 print OUTPUT <<END;
338 \\end{verbatim}
339 \\item[symbols:]
340 \\begin{verbatim}
343 for $s (sort keys %symbols)
344 { $line = "$line $s";
345 if (length($line) >= $CHARACTERS_PER_LINE)
346 { print OUTPUT "$line\n"; $line = ""; }
348 if ($line ne "") { print OUTPUT "$line\n"; }
349 $line = "";
351 print OUTPUT <<END;
352 \\end{verbatim}
353 \\item[others:]
354 \\begin{verbatim}
357 for $k (sort keys %others)
358 { if (! $keywords{$k} && ! $symbols{$k} && ! $non_terminals{$k})
359 { $line = "$line $k";
360 if (length($line) >= $CHARACTERS_PER_LINE)
361 { print OUTPUT "$line\n"; $line = ""; }
364 if ($line ne "") { print OUTPUT "$line\n"; }
365 $line = "";
367 print OUTPUT <<END;
368 \\end{verbatim}
369 \\end{description}
373 ###############################################################################
375 # Print diagnostics
377 ###############################################################################
378 sub print_diagnostics
380 print OUTPUT <<EOF;
381 $SECTION\{Diagonsistics}
382 \\begin{quotation}
383 \\begin{tabular}{ll}
384 \\bf Number of states & $number_of_states \\\\
385 \\bf Number of items & $number_of_items \\\\
386 \\bf Shift/reduce conflicts & $shift_reduce_conflicts \\\\
387 \\bf Reduce/reduce conflicts & $reduce_reduce_conflicts \\\\
388 \\bf Next/check table size & $next_check_size \\\\
389 \\end{tabular}
390 \\end{quotation}
394 ###############################################################################
396 # Topological sort to determine the non-terminal number
398 ###############################################################################
399 sub compute_nonterm_index
401 my (@ready) = ("<start>");
402 my (%processed) = ();
403 my ($index) = 1;
404 while ($#ready >= 0)
405 { if ($DEPTH_FIRST)
406 { $lhs = pop @ready;
407 } else
408 { ($lhs,@rest) = @ready;
409 @ready = @rest;
411 print STDERR "+" if $VERBOSE;
412 $non_term_no{$lhs} = $index++;
413 $n = $Alts{$lhs};
414 for ($i = 0; $i < $n; $i++)
415 { if ($DEPTH_FIRST)
416 { # Depth first
417 for $child (reverse @{$child_list{$lhs}})
418 { if ($non_terminals{$child} && ! $processed{$child})
419 { push @ready, $child;
420 $processed{$child} = 1;
423 } else
424 { # Breadth first
425 for $child (@{$child_list{$lhs}})
426 { if ($non_terminals{$child} && ! $processed{$child})
427 { push @ready, $child;
428 $processed{$child} = 1;
436 ###############################################################################
438 # Print grammar
440 ###############################################################################
441 sub print_grammar
443 print OUTPUT <<END;
444 \\newpage
445 $SECTION\{Grammar}
446 \\begin{quotation}
447 \\begin{math}
448 \\begin{array}{llcl}
451 my ($lines) = 0;
452 my (@ready) = ("<start>");
453 my (%processed) = ();
455 while ($#ready >= 0)
456 { if ($DEPTH_FIRST)
457 { $lhs = pop @ready;
458 } else
459 { ($lhs,@rest) = @ready;
460 @ready = @rest;
462 print STDERR "[$lhs]" if $VERBOSE;
463 $n = $Alts{$lhs};
464 if ($lines + $n > $LINES_PER_PAGE)
465 { $lines = 0;
466 print OUTPUT <<END;
467 \\end{array}
468 \\end{math}
469 \\end{quotation}
470 \\newpage
471 \\begin{quotation}
472 \\begin{math}
473 \\begin{array}{llcl}
476 for ($i = 0; $i < $n; $i++)
477 { if ($i == 0)
478 { my($quoted_lhs) = quote($lhs);
479 my($lhs_label) = $non_term_no{$lhs};
480 printf OUTPUT
481 "\\langle{\\sf $lhs_label}\\rangle & ", $lhs_label;
482 printf OUTPUT "\\mbox{${NON_TERMINAL_FONT} %s} ", $quoted_lhs;
483 print OUTPUT "& ::= &";
484 } else {
485 printf OUTPUT "& & | &";
487 my(@rhs) = @{$P{$lhs}[$i]};
488 print OUTPUT "$EPSILON" if ($#rhs == -1);
489 my($line) = "";
490 for ($j = 0; $j <= $#rhs; $j++)
491 { my($tok) = $rhs[$j];
492 $line = "$line $tok";
493 if (length($line) >= $CHARACTERS_PER_PRODUCTION)
494 { $line = "";
495 printf OUTPUT "\\\\\n& & &\\quad ";
496 $lines++;
498 if (! $non_terminals{$tok})
499 { printf OUTPUT "%s", verbatim($tok); }
500 else
501 { my($quoted_tok) = quote($tok);
502 printf OUTPUT "\\mbox{${NON_TERMINAL_FONT} %s}", $quoted_tok;
503 if ($non_terminals{$tok})
504 { printf OUTPUT "_{\\sf %s} ", $non_term_no{$tok};
507 printf OUTPUT "\\ ";
509 print OUTPUT "\\\\\n";
510 $lines++;
512 if ($DEPTH_FIRST)
513 { # Depth first
514 for $child (reverse @{$child_list{$lhs}})
515 { if ($non_terminals{$child} && ! $processed{$child})
516 { push @ready, $child;
517 $processed{$child} = 1;
520 } else
522 # Breadth first
523 for $child (@{$child_list{$lhs}})
524 { if ($non_terminals{$child} && ! $processed{$child})
525 { push @ready, $child;
526 $processed{$child} = 1;
531 print OUTPUT "\\\\\n";
532 $lines++;
534 print OUTPUT <<END;
535 \\end{array}
536 \\end{math}
537 \\end{quotation}