tools/scripts/grammar2latex

   1 #!/usr/bin/perl
   2 #@
   3 #@ grammar2latex:  Generate typesetted LaTeX grammar from Prop's report.
   4 #@ usage: grammar2latex [options] <report-file>
   5 #@     options:
   6 #@        [-S]              Generate section only
   7 #@        [-t]              Emit output at stdout
   8 #@        [-v]              Verbose mode
   9 #@        [-o<output-file>] Send output to a specified file
  10 #@        [-D]              Depth first presentation
  11 #@        [-B]              Breadth first presentation
  12
  13 require "getopts.pl";
  14
  15 $DEPTH_FIRST  = 1;
  16 $BAD = Getopts("hStvo:DB");
  17 $HELP         = $opt_h;
  18 $TO_STDOUT    = $opt_t;
  19 $SECTION_ONLY = $opt_S;
  20 $VERBOSE      = $opt_v;
  21 $OUTPUT_FILE  = $opt_o;
  22 $DEPTH_FIRST  = 0 if $opt_B;
  23
  24 print_help_and_die() if (!$BAD || $HELP || $#ARGV != 0);
  25
  26 ###############################################################################
  27 #
  28 #  Initialization.
  29 #
  30 ###############################################################################
  31 $GRAMMAR2LATEX_VERSION="0.1";
  32 $USER=$ENV{"USER"};
  33 $DATE=`date`; chop($DATE);
  34 $DOCUMENT_STYLE      = "\\documentclass{article}";
  35 $BAR                 = "|";
  36 $STARTRC             = "~$USER/.grammar2latex";
  37 $INPUT_FILE          = $ARGV[0];
  38 $QUOTED_INPUT_FILE   = quote($INPUT_FILE);
  39 $SECTION             = "\\section";
  40 $SUBSECTION          = "\\subsection";
  41 $OUTPUT_FILE         = "${INPUT_FILE}.tex" if ! $OUTPUT_FILE;
  42 $NON_TERMINAL_FONT   = "\\nonterm";
  43 $EPSILON             = '\epsilon';
  44 $LINES_PER_PAGE      = 50;
  45 $CHARACTERS_PER_LINE = 66;
  46 $CHARACTERS_PER_PRODUCTION = 40;
  47 $HTTP                = quote('http://chimera.cs.nyu.edu:8888/~leunga');
  48 $EMAIL               = quote('leunga@valis.cs.nyu.edu');
  49 $LATEX_MARGINS =
  50 '
  51    \setlength{\textwidth}{6.5in}
  52    \setlength{\evensidemargin}{-.2in}
  53    \setlength{\oddsidemargin}{-.2in}
  54    \setlength{\textheight}{9.00in}
  55    \setlength{\topmargin}{-0.5in}
  56    \setlength{\parskip}{2mm}
  57    \setlength{\parindent}{0mm}
  58 ';
  59 if ($OUTPUT_FILE =~ /^.*\/([^\/]+)$/)
  60 {  $OUTPUT_FILE = $1;
  61 }
  62 require "$STARTRC" if (-r $STARTRC);
  63
  64 ###############################################################################
  65 #
  66 #  Setup data structures.
  67 #
  68 ###############################################################################
  69 $SYNTAX_CLASS  = "";
  70 %terminals     = ();
  71 %symbols       = ();
  72 %keywords      = ();
  73 %others        = ();
  74 %non_terminals = ();
  75 %P             = ();
  76 %Alts          = ();
  77 %children      = ();
  78 %child_list    = ();
  79 %non_term_no   = ();
  80
  81 ###############################################################################
  82 #
  83 #  Scan output file
  84 #
  85 ###############################################################################
  86 open(INPUT,$INPUT_FILE) || die("$0: $!: $INPUT_FILE\n");
  87 $line_no = 0;
  88 while (<INPUT>)
  89 {  $line_no++;
  90    if (/^{\d+}\s+(\<.*\>)\s+->\s+(.*)\s+/)   # A special production
  91    {  $lhs = $1;
  92       $rhs = $2;
  93       if ($lhs eq '<start>')
  94       {  $non_terminals{$lhs} = 0;
  95          my($toks) = split_rhs($rhs);
  96          $P{$lhs}[$Alts{$lhs}++] = $toks;
  97          build_graph("<start>",$toks);
  98       }
  99       next;
 100    }
 101    if (/^{\d+}\s+(.*)\s+->\s+(.*)\s+/)   # A production
 102    {  $lhs = $1;
 103       $rhs = $2;
 104       $non_terminals{$lhs} = $line_no;
 105       my($toks) = split_rhs($rhs);
 106       $P{$lhs}[$Alts{$lhs}++] = $toks;
 107       build_graph($lhs,$toks);
 108       print STDERR '.' if $VERBOSE;
 109       next;
 110    }
 111    if (/^{\d+}\s+\|\s+(.*)\s+/)          # continuing
 112    {  $rhs = $1;
 113       my($toks) = split_rhs($rhs);
 114       $P{$lhs}[$Alts{$lhs}++] = $toks;
 115       build_graph($lhs,$toks);
 116       next;
 117    }
 118    $number_of_states        = $1 if /^Number of states\s+=\s+(\d+)/;
 119    $number_of_items         = $1 if /^Number of items\s+=\s+(\d+)/;
 120    $shift_reduce_conflicts  = $1 if /^Number of shift\/reduce conflicts\s+=\s+(\d+)/;
 121    $reduce_reduce_conflicts = $1 if /^Number of reduce\/reduce conflicts\s+=\s+(\d+)/;
 122    $next_check_size         = $1 if /^Max next\/check\s+=\s+(\d+)/;
 123    $SYNTAX_CLASS            = $1 if /^\[Syntax class (\S+)\]/;
 124 }
 125
 126 ###############################################################################
 127 #
 128 #  Check for consistency
 129 #
 130 ###############################################################################
 131 if ($SYNTAX_CLASS eq "")
 132 {  print STDERR "$0: no syntax class found in file '$INPUT_FILE'\n";
 133    exit(1);
 134 }
 135 $SYNTAX_CLASS = quote($SYNTAX_CLASS);
 136
 137 ###############################################################################
 138 #
 139 #  Generate output
 140 #
 141 ###############################################################################
 142 if ($TO_STDOUT)
 143 {  local(*OUTPUT) = *STDOUT; }
 144 else
 145 {  open(OUTPUT,">$OUTPUT_FILE") || die("$0: $!: $OUTPUT_FILE\n");
 146 }
 147
 148 print STDERR '[Printing header]' if $VERBOSE;
 149 print_header();
 150 print STDERR '[Building keyword list]' if $VERBOSE;
 151 print_keywords();
 152 print STDERR '[Computing index]' if $VERBOSE;
 153 compute_nonterm_index();
 154 print STDERR '[Printing grammar]' if $VERBOSE;
 155 print_grammar();
 156 print_diagnostics();
 157 print_footer();
 158 print STDOUT "$OUTPUT_FILE\n";
 159 exit(0);
 160
 161 ###############################################################################
 162 #
 163 #  Print help and die.
 164 #
 165 ###############################################################################
 166 sub print_help_and_die
 167 {  open (HELP,$0) || die("$!: $0\n");
 168    while (<HELP>)
 169    {  print STDERR $1, "\n" if /^#@ (.*)/;
 170    }
 171    exit(1);
 172 }
 173
 174 ###############################################################################
 175 #
 176 #  Quote LaTeX
 177 #
 178 ###############################################################################
 179 sub quote
 180 {  my($text) = @_;
 181    my($dollar) = '$';
 182    $text =~ s/([_%&^$@!{}])/\\\1/g;
 183    $text =~ s/\</${dollar}\\langle${dollar}/g;
 184    $text =~ s/\>/${dollar}\\rangle${dollar}/g;
 185    $text =~ s/~/{\\char126}/g;
 186    return $text;
 187 }
 188
 189 ###############################################################################
 190 #
 191 #  Verbatim LaTeX
 192 #
 193 ###############################################################################
 194 sub verbatim
 195 {  my($text) = @_;
 196    if (! ($text =~ /\|/))
 197    {  return "\\verb|${text}|"; }
 198    if (! ($text =~ /\./))
 199    {  return "\\verb.${text}."; }
 200    if (! ($text =~ /\!/))
 201    {  return "\\verb!${text}!"; }
 202    if (! ($text =~ /\@/))
 203    {  return "\\verb@${text}@"; }
 204    die ("$0: can't verbatimize: $text\n");
 205 }
 206
 207 ###############################################################################
 208 #
 209 #  Make a LaTeX label
 210 #
 211 ###############################################################################
 212 sub make_label
 213 {  my($label) = @_;
 214    $label =~ s/_/--/g;
 215    return $label;
 216 }
 217
 218 ###############################################################################
 219 #
 220 #  Check if a token is a symbol
 221 #
 222 ###############################################################################
 223 sub is_symbol
 224 {  my($tok) = @_;
 225    if ($tok =~ /^"[a-zA-Z_]/) { return 0; }
 226    else                       { return 1; }
 227 }
 228
 229 ###############################################################################
 230 #
 231 #  Split string into tokens
 232 #
 233 ###############################################################################
 234 sub split_rhs
 235 {  my($text) = @_;
 236    my(@tokens) = ();
 237    while (! $text =~ /^\s*$/)
 238    {  if ($text =~ /^("(([^"]|\\.)*)")\s*(.*)$/)
 239       {  push @tokens, $1; $text = $4;
 240          if (is_symbol($1)) { $symbols{$2} = 1; }
 241          else               { $keywords{$2} = 1; }
 242          next;
 243       }
 244       if ($text =~ /^('(([^']|\\.)*)')\s*(.*)$/)
 245       {  push @tokens, $1; $text = $4; $symbols{$2} = 1; next; }
 246       if ($text =~ /^\?\s+(.*)$/)
 247       {  push @tokens, "<error>"; $text = $1; next; }
 248       if ($text =~ /^\$\s+(.*)$/)
 249       {  push @tokens, "<EOF>"; $text = $1; next; }
 250       if ($text =~ /^\<\d+\>\s+(.*)$/)
 251       {  $text = $1; next; }
 252       if ($text =~ /^(\S+)\s+(.*)$/)
 253       {  push @tokens, $1; $text = $2; $others{$1} = 1; next; }
 254    }
 255    return \@tokens;
 256 }
 257
 258 ###############################################################################
 259 #
 260 #  Build the dependency graph
 261 #
 262 ###############################################################################
 263 sub build_graph
 264 {  my($lhs,$rhs) = @_;
 265    for $i (@{$rhs})
 266    {  if (! $children{$lhs}{$i})
 267       {  push @{$child_list{$lhs}}, $i;
 268          #print STDERR "{$lhs -> $i}";
 269          $children{$lhs}{$i} = 1;
 270       }
 271    }
 272 }
 273
 274 ###############################################################################
 275 #
 276 #  Print header
 277 #
 278 ###############################################################################
 279 sub print_header
 280 {
 281    if (! $SECTION_ONLY)
 282    {  print OUTPUT <<EOF;
 283 $DOCUMENT_STYLE
 284 $LATEX_MARGINS
 285 \\title{\\Large \\bf Syntax class $SYNTAX_CLASS\\thanks{
 286             Generated from the file ``\\tt $QUOTED_INPUT_FILE'' using
 287             {\\em grammar2latex} version $GRAMMAR2LATEX_VERSION on $DATE.
 288             {\\em grammar2latex} is part of the {\\sf Prop} tool set.
 289             Please visit {\\tt $HTTP} for more details.
 290          }
 291        }
 292 \\author{User $USER}
 293 \\begin{document}
 294    \\newfont{\\nonterm}{cmssq9}
 295    \\maketitle
 296 EOF
 297    }
 298 }
 299
 300 ###############################################################################
 301 #
 302 #  Print footer
 303 #
 304 ###############################################################################
 305 sub print_footer
 306 {
 307    if (! $SECTION_ONLY)
 308    {
 309        print OUTPUT <<EOF;
 310 \\end{document}
 311 EOF
 312    }
 313 }
 314
 315 ###############################################################################
 316 #
 317 #  Print keywords
 318 #
 319 ###############################################################################
 320 sub print_keywords
 321 {  print OUTPUT <<END;
 322 ${SECTION}\{Lexical structure}
 323 \\begin{description}
 324    \\item[keywords:]
 325       \\begin{verbatim}
 326 END
 327
 328    my($line,$k,$s) = "";
 329    for $k (sort keys %keywords)
 330    {  $line = "$line  $k";
 331       if (length($line) >= $CHARACTERS_PER_LINE)
 332       {  print OUTPUT "$line\n"; $line = ""; }
 333    }
 334    if ($line ne "") { print OUTPUT "$line\n"; }
 335    $line = "";
 336
 337    print OUTPUT <<END;
 338       \\end{verbatim}
 339    \\item[symbols:]
 340       \\begin{verbatim}
 341 END
 342
 343    for $s (sort keys %symbols)
 344    {  $line = "$line  $s";
 345       if (length($line) >= $CHARACTERS_PER_LINE)
 346       {  print OUTPUT "$line\n"; $line = ""; }
 347    }
 348    if ($line ne "") { print OUTPUT "$line\n"; }
 349    $line = "";
 350
 351    print OUTPUT <<END;
 352       \\end{verbatim}
 353    \\item[others:]
 354       \\begin{verbatim}
 355 END
 356
 357    for $k (sort keys %others)
 358    {  if (! $keywords{$k} && ! $symbols{$k} && ! $non_terminals{$k})
 359       {  $line = "$line  $k";
 360          if (length($line) >= $CHARACTERS_PER_LINE)
 361          {  print OUTPUT "$line\n"; $line = ""; }
 362       }
 363    }
 364    if ($line ne "") { print OUTPUT "$line\n"; }
 365    $line = "";
 366
 367    print OUTPUT <<END;
 368       \\end{verbatim}
 369    \\end{description}
 370 END
 371 }
 372
 373 ###############################################################################
 374 #
 375 #  Print diagnostics
 376 #
 377 ###############################################################################
 378 sub print_diagnostics
 379 {
 380    print OUTPUT <<EOF;
 381 $SECTION\{Diagonsistics}
 382    \\begin{quotation}
 383       \\begin{tabular}{ll}
 384          \\bf Number of states        & $number_of_states \\\\
 385          \\bf Number of items         & $number_of_items \\\\
 386          \\bf Shift/reduce conflicts  & $shift_reduce_conflicts \\\\
 387          \\bf Reduce/reduce conflicts & $reduce_reduce_conflicts \\\\
 388          \\bf Next/check table size   & $next_check_size \\\\
 389       \\end{tabular}
 390    \\end{quotation}
 391 EOF
 392 }
 393
 394 ###############################################################################
 395 #
 396 #  Topological sort to determine the non-terminal number
 397 #
 398 ###############################################################################
 399 sub compute_nonterm_index
 400 {
 401    my (@ready)     = ("<start>");
 402    my (%processed) = ();
 403    my ($index)     = 1;
 404    while ($#ready >= 0)
 405    {  if ($DEPTH_FIRST)
 406       {  $lhs = pop @ready;
 407       } else
 408       {  ($lhs,@rest) = @ready;
 409          @ready = @rest;
 410       }
 411       print STDERR "+" if $VERBOSE;
 412       $non_term_no{$lhs} = $index++;
 413       $n = $Alts{$lhs};
 414       for ($i = 0; $i < $n; $i++)
 415       {  if ($DEPTH_FIRST)
 416          {  # Depth first
 417             for $child (reverse @{$child_list{$lhs}})
 418             {  if ($non_terminals{$child} && ! $processed{$child})
 419                {  push @ready, $child;
 420                   $processed{$child} = 1;
 421                }
 422             }
 423          } else
 424          {  # Breadth first
 425             for $child (@{$child_list{$lhs}})
 426             {  if ($non_terminals{$child} && ! $processed{$child})
 427                {  push @ready, $child;
 428                   $processed{$child} = 1;
 429                }
 430             }
 431          }
 432       }
 433    }
 434 }
 435
 436 ###############################################################################
 437 #
 438 #  Print grammar
 439 #
 440 ###############################################################################
 441 sub print_grammar
 442 {
 443    print OUTPUT <<END;
 444 \\newpage
 445 $SECTION\{Grammar}
 446 \\begin{quotation}
 447 \\begin{math}
 448    \\begin{array}{llcl}
 449 END
 450
 451    my ($lines)     = 0;
 452    my (@ready)     = ("<start>");
 453    my (%processed) = ();
 454
 455    while ($#ready >= 0)
 456    {  if ($DEPTH_FIRST)
 457       {  $lhs = pop @ready;
 458       } else
 459       {  ($lhs,@rest) = @ready;
 460          @ready = @rest;
 461       }
 462       print STDERR "[$lhs]" if $VERBOSE;
 463       $n = $Alts{$lhs};
 464       if ($lines + $n > $LINES_PER_PAGE)
 465       {  $lines = 0;
 466          print OUTPUT <<END;
 467    \\end{array}
 468 \\end{math}
 469 \\end{quotation}
 470 \\newpage
 471 \\begin{quotation}
 472 \\begin{math}
 473    \\begin{array}{llcl}
 474 END
 475       }
 476       for ($i = 0; $i < $n; $i++)
 477       {  if ($i == 0)
 478          {  my($quoted_lhs) = quote($lhs);
 479             my($lhs_label)  = $non_term_no{$lhs};
 480             printf OUTPUT
 481                "\\langle{\\sf $lhs_label}\\rangle & ", $lhs_label;
 482             printf OUTPUT "\\mbox{${NON_TERMINAL_FONT} %s} ", $quoted_lhs;
 483             print OUTPUT "& ::= &";
 484          } else {
 485             printf OUTPUT "& & | &";
 486          }
 487          my(@rhs) = @{$P{$lhs}[$i]};
 488          print OUTPUT "$EPSILON" if ($#rhs == -1);
 489          my($line) = "";
 490          for ($j = 0; $j <= $#rhs; $j++)
 491          {  my($tok) = $rhs[$j];
 492             $line = "$line $tok";
 493             if (length($line) >= $CHARACTERS_PER_PRODUCTION)
 494             {  $line = "";
 495                printf OUTPUT "\\\\\n& & &\\quad ";
 496                $lines++;
 497             }
 498             if (! $non_terminals{$tok})
 499             {  printf OUTPUT "%s", verbatim($tok); }
 500             else
 501             {  my($quoted_tok) = quote($tok);
 502                printf OUTPUT "\\mbox{${NON_TERMINAL_FONT} %s}", $quoted_tok;
 503                if ($non_terminals{$tok})
 504                {  printf OUTPUT "_{\\sf %s} ", $non_term_no{$tok};
 505                }
 506             }
 507             printf OUTPUT "\\ ";
 508          }
 509          print OUTPUT "\\\\\n";
 510          $lines++;
 511
 512          if ($DEPTH_FIRST)
 513          {  # Depth first
 514             for $child (reverse @{$child_list{$lhs}})
 515             {  if ($non_terminals{$child} && ! $processed{$child})
 516                {  push @ready, $child;
 517                   $processed{$child} = 1;
 518                }
 519             }
 520          } else
 521          {
 522             # Breadth first
 523             for $child (@{$child_list{$lhs}})
 524             {  if ($non_terminals{$child} && ! $processed{$child})
 525                {  push @ready, $child;
 526                   $processed{$child} = 1;
 527                }
 528             }
 529          }
 530       }
 531       print OUTPUT "\\\\\n";
 532       $lines++;
 533    }
 534    print OUTPUT <<END;
 535    \\end{array}
 536 \\end{math}
 537 \\end{quotation}
 538 END
 539 }