doc/rdsrc.pl

   1 #!/usr/bin/perl
   2 ## --------------------------------------------------------------------------
   3 ##
   4 ##   Copyright 1996-2017 The NASM Authors - All Rights Reserved
   5 ##   See the file AUTHORS included with the NASM distribution for
   6 ##   the specific copyright holders.
   7 ##
   8 ##   Redistribution and use in source and binary forms, with or without
   9 ##   modification, are permitted provided that the following
  10 ##   conditions are met:
  11 ##
  12 ##   * Redistributions of source code must retain the above copyright
  13 ##     notice, this list of conditions and the following disclaimer.
  14 ##   * Redistributions in binary form must reproduce the above
  15 ##     copyright notice, this list of conditions and the following
  16 ##     disclaimer in the documentation and/or other materials provided
  17 ##     with the distribution.
  18 ##
  19 ##     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  20 ##     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  21 ##     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  22 ##     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 ##     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  24 ##     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25 ##     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  26 ##     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27 ##     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28 ##     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29 ##     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  30 ##     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  31 ##     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32 ##
  33 ## --------------------------------------------------------------------------
  34
  35
  36 # Read the source-form of the NASM manual and generate the various
  37 # output forms.
  38
  39 # TODO:
  40 #
  41 # Ellipsis support would be nice.
  42
  43 # Source-form features:
  44 # ---------------------
  45 #
  46 # Bullet \b
  47 #   Bullets the paragraph. Rest of paragraph is indented to cope. In
  48 #   HTML, consecutive groups of bulleted paragraphs become unordered
  49 #   lists.
  50 #
  51 # Indent \>
  52 #   Indents the paragraph equvalently to a bulleted paragraph.  In HTML,
  53 #   an indented paragraph following a bulleted paragraph is included in the
  54 #   same list item.
  55 #
  56 # Blockquote \q
  57 #   Marks the paragraph as a block quote.
  58 #
  59 # Emphasis \e{foobar}
  60 #   produces `_foobar_' in text and italics in HTML, PS, RTF
  61 #
  62 # Inline code \c{foobar}
  63 #   produces ``foobar'' in text, and fixed-pitch font in HTML, PS, RTF
  64 #
  65 # Display code
  66 # \c  line one
  67 # \c   line two
  68 #   produces fixed-pitch font where appropriate, and doesn't break
  69 #   pages except sufficiently far into the middle of a display.
  70 #
  71 # Chapter, header and subheader
  72 # \C{intro} Introduction
  73 # \H{whatsnasm} What is NASM?
  74 # \S{free} NASM Is Free
  75 #   dealt with as appropriate. Chapters begin on new sides, possibly
  76 #   even new _pages_. (Sub)?headers are good places to begin new
  77 #   pages. Just _after_ a (sub)?header isn't.
  78 #   The keywords can be substituted with \K and \k.
  79 #
  80 # Keyword \K{cintro} \k{cintro}
  81 #   Expands to `Chapter 1', `Section 1.1', `Section 1.1.1'. \K has an
  82 #   initial capital whereas \k doesn't. In HTML, will produce
  83 #   hyperlinks.
  84 #
  85 # Web link \W{http://foobar/}{text} or \W{mailto:me@here}\c{me@here}
  86 #   the \W prefix is ignored except in HTML; in HTML the last part
  87 #   becomes a hyperlink to the first part.
  88 #
  89 # Literals \{ \} \\
  90 #   In case it's necessary, they expand to the real versions.
  91 #
  92 # Nonbreaking hyphen \-
  93 #   Need more be said?
  94 #
  95 # Source comment \#
  96 #   Causes everything after it on the line to be ignored by the
  97 #   source-form processor.
  98 #
  99 # Indexable word \i{foobar} (or \i\e{foobar} or \i\c{foobar}, equally)
 100 #   makes word appear in index, referenced to that point
 101 #   \i\c comes up in code style even in the index; \i\e doesn't come
 102 #   up in emphasised style.
 103 #
 104 # Indexable non-displayed word \I{foobar} or \I\c{foobar}
 105 #   just as \i{foobar} except that nothing is displayed for it
 106 #
 107 # Index rewrite
 108 # \IR{foobar} \c{foobar} operator, uses of
 109 #   tidies up the appearance in the index of something the \i or \I
 110 #   operator was applied to
 111 #
 112 # Index alias
 113 # \IA{foobar}{bazquux}
 114 #   aliases one index tag (as might be supplied to \i or \I) to
 115 #   another, so that \I{foobar} has the effect of \I{bazquux}, and
 116 #   \i{foobar} has the effect of \I{bazquux}foobar
 117 #
 118 # Metadata
 119 # \M{key}{something}
 120 #   defines document metadata, such as authorship, title and copyright;
 121 #   different output formats use this differently.
 122 #
 123 # Include subfile
 124 # \&{filename}
 125 #  Includes filename. Recursion is allowed.
 126 #
 127
 128 use File::Spec;
 129
 130 @include_path = ();
 131 $out_path = File::Spec->curdir();
 132
 133 while ($ARGV[0] =~ /^-/) {
 134     my $opt = shift @ARGV;
 135     if ($opt eq '-d') {
 136         $diag = 1;
 137     } elsif ($opt =~ /^\-[Ii](.*)$/) {
 138         push(@include_path, $1);
 139     } elsif ($opt =~ /^\-[Oo](.*)$/) {
 140         $out_path = $1;
 141     }
 142 }
 143
 144 $out_format = shift(@ARGV);
 145 @files = @ARGV;
 146 @files = ('-') unless(scalar(@files));
 147
 148 $| = 1;
 149
 150 $tstruct_previtem = $node = "Top";
 151 $nodes = ($node);
 152 $tstruct_level{$tstruct_previtem} = 0;
 153 $tstruct_last[$tstruct_level{$tstruct_previtem}] = $tstruct_previtem;
 154 $MAXLEVEL = 10;  # really 3, but play safe ;-)
 155
 156 # Read the file; pass a paragraph at a time to the paragraph processor.
 157 print "Reading input...";
 158 $pname = "para000000";
 159 @pnames = @pflags = ();
 160 $para = undef;
 161 foreach $file (@files) {
 162   &include($file);
 163 }
 164 &got_para($para);
 165 print "done.\n";
 166
 167 # Now we've read in the entire document and we know what all the
 168 # heading keywords refer to. Go through and fix up the \k references.
 169 print "Fixing up cross-references...";
 170 &fixup_xrefs;
 171 print "done.\n";
 172
 173 # Sort the index tags, according to the slightly odd order I've decided on.
 174 print "Sorting index tags...";
 175 &indexsort;
 176 print "done.\n";
 177
 178 # Make output directory if necessary
 179 mkdir($out_path);
 180
 181 if ($diag) {
 182   print "Writing index-diagnostic file...";
 183   &indexdiag;
 184   print "done.\n";
 185 }
 186
 187 # OK. Write out the various output files.
 188 if ($out_format eq 'txt') {
 189     print "Producing text output: ";
 190     &write_txt;
 191     print "done.\n";
 192 } elsif ($out_format eq 'html') {
 193     print "Producing HTML output: ";
 194     &write_html;
 195     print "done.\n";
 196 } elsif ($out_format eq 'dip') {
 197     print "Producing Documentation Intermediate Paragraphs: ";
 198     &write_dip;
 199     print "done.\n";
 200 } else {
 201     die "$0: unknown output format: $out_format\n";
 202 }
 203
 204 sub untabify($) {
 205   my($s) = @_;
 206   my $o = '';
 207   my($c, $i, $p);
 208
 209   $p = 0;
 210   for ($i = 0; $i < length($s); $i++) {
 211     $c = substr($s, $i, 1);
 212     if ($c eq "\t") {
 213       do {
 214         $o .= ' ';
 215         $p++;
 216       } while ($p & 7);
 217     } else {
 218       $o .= $c;
 219       $p++;
 220     }
 221   }
 222   return $o;
 223 }
 224 sub read_line {
 225   local $_ = shift;
 226   $_ = &untabify($_);
 227   if (/\\& (\S+)/) {
 228      &include($1);
 229   } else {
 230      &get_para($_);
 231   }
 232 }
 233 sub get_para($_) {
 234   chomp;
 235   if (!/\S/ || /^\\(IA|IR|M)/) { # special case: \IA \IR \M imply new-paragraph
 236     &got_para($para);
 237     $para = undef;
 238   }
 239   if (/\S/) {
 240     s/(^|[^\\])\\#.*$/\1/; # strip comments
 241     $para .= " " . $_;
 242   }
 243 }
 244 sub include {
 245   my $name = shift;
 246   my $F;
 247
 248   if ($name eq '-') {
 249     open($F, '<-');             # stdin
 250   } else {
 251     my $found = 0;
 252     foreach my $idir ( File::Spec->curdir, @include_path ) {
 253         my $fpath = File::Spec->catfile($idir, $name);
 254       if (open($F, '<', $fpath)) {
 255         $found = 1;
 256         last;
 257       }
 258     }
 259     die "Cannot open $name: $!\n" unless ($found);
 260   }
 261   while (defined($_ = <$F>)) {
 262      &read_line($_);
 263   }
 264   close($F);
 265 }
 266 sub got_para {
 267   local ($_) = @_;
 268   my $pflags = "", $i, $w, $l, $t;
 269   return if !/\S/;
 270
 271   @$pname = ();
 272
 273   # Strip off _leading_ spaces, then determine type of paragraph.
 274   s/^\s*//;
 275   $irewrite = undef;
 276   if (/^\\c[^{]/) {
 277     # A code paragraph. The paragraph-array will contain the simple
 278     # strings which form each line of the paragraph.
 279     $pflags = "code";
 280     while (/^\\c (([^\\]|\\[^c])*)(.*)$/) {
 281       $l = $1;
 282       $_ = $3;
 283       $l =~ s/\\\{/\{/g;
 284       $l =~ s/\\\}/}/g;
 285       $l =~ s/\\\\/\\/g;
 286       push @$pname, $l;
 287     }
 288     $_ = ''; # suppress word-by-word code
 289   } elsif (/^\\C/) {
 290     # A chapter heading. Define the keyword and allocate a chapter
 291     # number.
 292     $cnum++;
 293     $hnum = 0;
 294     $snum = 0;
 295     $xref = "chapter-$cnum";
 296     $pflags = "chap $cnum :$xref";
 297     die "badly formatted chapter heading: $_\n" if !/^\\C\{([^\}]*)\}\s*(.*)$/;
 298     $refs{$1} = "chapter $cnum";
 299     $node = "Chapter $cnum";
 300     &add_item($node, 1);
 301     $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
 302     $xrefs{$1} = $xref;
 303     $_ = $2;
 304     # the standard word-by-word code will happen next
 305   } elsif (/^\\A/) {
 306     # An appendix heading. Define the keyword and allocate an appendix
 307     # letter.
 308     $cnum++;
 309     $cnum = 'A' if $cnum =~ /[0-9]+/;
 310     $hnum = 0;
 311     $snum = 0;
 312     $xref = "appendix-$cnum";
 313     $pflags = "appn $cnum :$xref";
 314     die "badly formatted appendix heading: $_\n" if !/^\\A\{([^\}]*)}\s*(.*)$/;
 315     $refs{$1} = "appendix $cnum";
 316     $node = "Appendix $cnum";
 317     &add_item($node, 1);
 318     $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
 319     $xrefs{$1} = $xref;
 320     $_ = $2;
 321     # the standard word-by-word code will happen next
 322   } elsif (/^\\H/) {
 323     # A major heading. Define the keyword and allocate a section number.
 324     $hnum++;
 325     $snum = 0;
 326     $xref = "section-$cnum.$hnum";
 327     $pflags = "head $cnum.$hnum :$xref";
 328     die "badly formatted heading: $_\n" if !/^\\[HP]{([^\}]*)}\s*(.*)$/;
 329     $refs{$1} = "section $cnum.$hnum";
 330     $node = "Section $cnum.$hnum";
 331     &add_item($node, 2);
 332     $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
 333     $xrefs{$1} = $xref;
 334     $_ = $2;
 335     # the standard word-by-word code will happen next
 336   } elsif (/^\\S/) {
 337     # A sub-heading. Define the keyword and allocate a section number.
 338     $snum++;
 339     $xref = "section-$cnum.$hnum.$snum";
 340     $pflags = "subh $cnum.$hnum.$snum :$xref";
 341     die "badly formatted subheading: $_\n" if !/^\\S\{([^\}]*)\}\s*(.*)$/;
 342     $refs{$1} = "section $cnum.$hnum.$snum";
 343     $node = "Section $cnum.$hnum.$snum";
 344     &add_item($node, 3);
 345     $xrefnodes{$node} = $xref; $nodexrefs{$xref} = $node;
 346     $xrefs{$1} = $xref;
 347     $_ = $2;
 348     # the standard word-by-word code will happen next
 349   } elsif (/^\\IR/) {
 350     # An index-rewrite.
 351     die "badly formatted index rewrite: $_\n" if !/^\\IR\{([^\}]*)\}\s*(.*)$/;
 352     $irewrite = $1;
 353     $_ = $2;
 354     # the standard word-by-word code will happen next
 355   } elsif (/^\\IA/) {
 356     # An index-alias.
 357     die "badly formatted index alias: $_\n" if !/^\\IA\{([^\}]*)}\{([^\}]*)\}\s*$/;
 358     $idxalias{$1} = $2;
 359     return; # avoid word-by-word code
 360   } elsif (/^\\M/) {
 361     # Metadata
 362     die "badly formed metadata: $_\n" if !/^\\M\{([^\}]*)}\{([^\}]*)\}\s*$/;
 363     $metadata{$1} = $2;
 364     return; # avoid word-by-word code
 365   } elsif (/^\\([b\>q])/) {
 366     # An indented paragraph of some sort. Strip off the initial \b and let the
 367       # word-by-word code take care of the rest.
 368       my %ipar = (
 369           'b' => 'bull',
 370           '>' => 'indt',
 371           'q' => 'bquo',
 372           );
 373     $pflags = $ipar{$1};
 374     s/^\\[b\>q]\s*//;
 375   } else {
 376     # A normal paragraph. Just set $pflags: the word-by-word code does
 377     # the rest.
 378     $pflags = "norm";
 379   }
 380
 381   # The word-by-word code: unless @$pname is already defined (which it
 382   # will be in the case of a code paragraph), split the paragraph up
 383   # into words and push each on @$pname.
 384   #
 385   # Each thing pushed on @$pname should have a two-character type
 386   # code followed by the text.
 387   #
 388   # Type codes are:
 389   # "n " for normal
 390   # "da" for an en dash
 391   # "dm" for an em desh
 392   # "es" for first emphasised word in emphasised bit
 393   # "e " for emphasised in mid-emphasised-bit
 394   # "ee" for last emphasised word in emphasised bit
 395   # "eo" for single (only) emphasised word
 396   # "c " for code
 397   # "k " for cross-ref
 398   # "kK" for capitalised cross-ref
 399   # "w " for Web link
 400   # "wc" for code-type Web link
 401   # "x " for beginning of resolved cross-ref; generates no visible output,
 402   #      and the text is the cross-reference code
 403   # "xe" for end of resolved cross-ref; text is same as for "x ".
 404   # "i " for point to be indexed: the text is the internal index into the
 405   #      index-items arrays
 406   # "sp" for space
 407   while (/\S/) {
 408     s/^\s*//, push @$pname, "sp" if /^\s/;
 409     $indexing = $qindex = 0;
 410     if (/^(\\[iI])?\\c/) {
 411       $qindex = 1 if $1 eq "\\I";
 412       $indexing = 1, s/^\\[iI]// if $1;
 413       s/^\\c//;
 414       die "badly formatted \\c: \\c$_\n" if !/\{(([^\\}]|\\.)*)\}(.*)$/;
 415       $w = $1;
 416       $_ = $3;
 417       $w =~ s/\\\{/\{/g;
 418       $w =~ s/\\\}/\}/g;
 419       $w =~ s/\\-/-/g;
 420       $w =~ s/\\\\/\\/g;
 421       (push @$pname,"i"),$lastp = $#$pname if $indexing;
 422       push @$pname,"c $w" if !$qindex;
 423       $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
 424     } elsif (/^\\[iIe]/) {
 425       /^(\\[iI])?(\\e)?/;
 426       $emph = 0;
 427       $qindex = 1 if $1 eq "\\I";
 428       $indexing = 1, $type = "\\i" if $1;
 429       $emph = 1, $type = "\\e" if $2;
 430       s/^(\\[iI])?(\\e?)//;
 431       die "badly formatted $type: $type$_\n" if !/\{(([^\\}]|\\.)*)\}(.*)$/;
 432       $w = $1;
 433       $_ = $3;
 434       $w =~ s/\\\{/\{/g;
 435       $w =~ s/\\\}/\}/g;
 436       $w =~ s/\\-/-/g;
 437       $w =~ s/\\\\/\\/g;
 438       $t = $emph ? "es" : "n ";
 439       @ientry = ();
 440       (push @$pname,"i"),$lastp = $#$pname if $indexing;
 441       foreach $i (split /\s+/,$w) {  # \e and \i can be multiple words
 442         push @$pname,"$t$i","sp" if !$qindex;
 443         ($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp" if $indexing;
 444         $t = $emph ? "e " : "n ";
 445       }
 446       $w =~ tr/A-Z/a-z/, pop @ientry if $indexing;
 447       $$pname[$lastp] = &addidx($node, $w, @ientry) if $indexing;
 448       pop @$pname if !$qindex; # remove final space
 449       if (substr($$pname[$#$pname],0,2) eq "es" && !$qindex) {
 450         substr($$pname[$#$pname],0,2) = "eo";
 451       } elsif ($emph && !$qindex) {
 452         substr($$pname[$#$pname],0,2) = "ee";
 453       }
 454     } elsif (/^\\[kK]/) {
 455       $t = "k ";
 456       $t = "kK" if /^\\K/;
 457       s/^\\[kK]//;
 458       die "badly formatted \\k: \\k$_\n" if !/\{([^\}]*)\}(.*)$/;
 459       $_ = $2;
 460       push @$pname,"$t$1";
 461     } elsif (/^\\W/) {
 462       s/^\\W//;
 463       die "badly formatted \\W: \\W$_\n"
 464           if !/\{([^\}]*)\}(\\i)?(\\c)?\{(([^\\}]|\\.)*)\}(.*)$/;
 465       $l = $1;
 466       $w = $4;
 467       $_ = $6;
 468       $t = "w ";
 469       $t = "wc" if $3 eq "\\c";
 470       $indexing = 1 if $2;
 471       $w =~ s/\\\{/\{/g;
 472       $w =~ s/\\\}/\}/g;
 473       $w =~ s/\\-/-/g;
 474       $w =~ s/\\\\/\\/g;
 475       (push @$pname,"i"),$lastp = $#$pname if $indexing;
 476       push @$pname,"$t<$l>$w";
 477       $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
 478     } else {
 479       die "what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/;
 480       die "painful death! $_\n" if !length $1;
 481       $w = $1;
 482       $_ = $3;
 483       $w =~ s/\\\{/\{/g;
 484       $w =~ s/\\\}/\}/g;
 485       $w =~ s/\\-/-/g;
 486       $w =~ s/\\\\/\\/g;
 487       if ($w eq '--') {
 488           push @$pname, 'dm';
 489       } elsif ($w eq '-') {
 490         push @$pname, 'da';
 491       } else {
 492         push @$pname,"n $w";
 493       }
 494     }
 495   }
 496   if ($irewrite ne undef) {
 497     &addidx(undef, $irewrite, @$pname);
 498     @$pname = ();
 499   } else {
 500     push @pnames, $pname;
 501     push @pflags, $pflags;
 502     $pname++;
 503   }
 504 }
 505
 506 sub addidx {
 507   my ($node, $text, @ientry) = @_;
 508   $text = $idxalias{$text} || $text;
 509   if ($node eq undef || !$idxmap{$text}) {
 510     @$ientry = @ientry;
 511     $idxmap{$text} = $ientry;
 512     $ientry++;
 513   }
 514   if ($node) {
 515     $idxnodes{$node,$text} = 1;
 516     return "i $text";
 517   }
 518 }
 519
 520 sub indexsort {
 521   my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval;
 522
 523   @itags = map { # get back the original data as the 1st elt of each list
 524              $_->[0]
 525            } sort { # compare auxiliary (non-first) elements of lists
 526              $a->[1] cmp $b->[1] ||
 527              $a->[2] cmp $b->[2] ||
 528              $a->[0] cmp $b->[0]
 529            } map { # transform array into list of 3-element lists
 530              my $ientry = $idxmap{$_};
 531              my $a = substr($$ientry[0],2);
 532              $a =~ tr/A-Za-z0-9//cd;
 533              [$_, uc($a), substr($$ientry[0],0,2)]
 534            } keys %idxmap;
 535
 536   # Having done that, check for comma-hood.
 537   $cval = 0;
 538   foreach $iitem (@itags) {
 539     $ientry = $idxmap{$iitem};
 540     $clrcval = 1;
 541     $pcval = $cval;
 542     FL:for ($i=0; $i <= $#$ientry; $i++) {
 543       if ($$ientry[$i] =~ /^(n .*,)(.*)/) {
 544         $$ientry[$i] = $1;
 545         splice @$ientry,$i+1,0,"n $2" if length $2;
 546         $commapos{$iitem} = $i+1;
 547         $cval = join("\002", @$ientry[0..$i]);
 548         $clrcval = 0;
 549         last FL;
 550       }
 551     }
 552     $cval = undef if $clrcval;
 553     $commanext{$iitem} = $commaafter{$piitem} = 1
 554       if $cval and ($cval eq $pcval);
 555     $piitem = $iitem;
 556   }
 557 }
 558
 559 sub indexdiag {
 560   my $iitem,$ientry,$w,$ww,$foo,$node;
 561   open INDEXDIAG, '>', File::Spec->catfile($out_path, 'index.diag');
 562   foreach $iitem (@itags) {
 563     $ientry = $idxmap{$iitem};
 564     print INDEXDIAG "<$iitem> ";
 565     foreach $w (@$ientry) {
 566       $ww = &word_txt($w);
 567       print INDEXDIAG $ww unless $ww eq "\001";
 568     }
 569     print INDEXDIAG ":";
 570     $foo = " ";
 571     foreach $node (@nodes) {
 572       (print INDEXDIAG $foo,$node), $foo = ", " if $idxnodes{$node,$iitem};
 573     }
 574     print INDEXDIAG "\n";
 575   }
 576   close INDEXDIAG;
 577 }
 578
 579 sub fixup_xrefs {
 580   my $pname, $p, $i, $j, $k, $caps, @repl;
 581
 582   for ($p=0; $p<=$#pnames; $p++) {
 583     next if $pflags[$p] eq "code";
 584     $pname = $pnames[$p];
 585     for ($i=$#$pname; $i >= 0; $i--) {
 586       if ($$pname[$i] =~ /^k/) {
 587         $k = $$pname[$i];
 588         $caps = ($k =~ /^kK/);
 589         $k = substr($k,2);
 590         $repl = $refs{$k};
 591         die "undefined keyword `$k'\n" unless $repl;
 592         substr($repl,0,1) =~ tr/a-z/A-Z/ if $caps;
 593         @repl = ();
 594         push @repl,"x $xrefs{$k}";
 595         foreach $j (split /\s+/,$repl) {
 596           push @repl,"n $j";
 597           push @repl,"sp";
 598         }
 599         pop @repl; # remove final space
 600         push @repl,"xe$xrefs{$k}";
 601         splice @$pname,$i,1,@repl;
 602       }
 603     }
 604   }
 605 }
 606
 607 sub write_txt {
 608   # This is called from the top level, so I won't bother using
 609   # my or local.
 610
 611   # Open file.
 612   print "writing file...";
 613   open TEXT, '>', File::Spec->catfile($out_path, 'nasmdoc.txt');
 614   select TEXT;
 615
 616   # Preamble.
 617   $title = $metadata{'title'};
 618   $spaces = ' ' x ((75-(length $title))/2);
 619   ($underscore = $title) =~ s/./=/g;
 620   print "$spaces$title\n$spaces$underscore\n";
 621
 622   for ($para = 0; $para <= $#pnames; $para++) {
 623     $pname = $pnames[$para];
 624     $pflags = $pflags[$para];
 625     $ptype = substr($pflags,0,4);
 626
 627     print "\n"; # always one of these before a new paragraph
 628
 629     if ($ptype eq "chap") {
 630       # Chapter heading. "Chapter N: Title" followed by a line of
 631       # minus signs.
 632       $pflags =~ /chap (.*) :(.*)/;
 633       $title = "Chapter $1: ";
 634       foreach $i (@$pname) {
 635         $ww = &word_txt($i);
 636         $title .= $ww unless $ww eq "\001";
 637       }
 638       print "$title\n";
 639       $title =~ s/./-/g;
 640       print "$title\n";
 641     } elsif ($ptype eq "appn") {
 642       # Appendix heading. "Appendix N: Title" followed by a line of
 643       # minus signs.
 644       $pflags =~ /appn (.*) :(.*)/;
 645       $title = "Appendix $1: ";
 646       foreach $i (@$pname) {
 647         $ww = &word_txt($i);
 648         $title .= $ww unless $ww eq "\001";
 649       }
 650       print "$title\n";
 651       $title =~ s/./-/g;
 652       print "$title\n";
 653     } elsif ($ptype eq "head" || $ptype eq "subh") {
 654       # Heading or subheading. Just a number and some text.
 655       $pflags =~ /.... (.*) :(.*)/;
 656       $title = sprintf "%6s ", $1;
 657       foreach $i (@$pname) {
 658         $ww = &word_txt($i);
 659         $title .= $ww unless $ww eq "\001";
 660       }
 661       print "$title\n";
 662     } elsif ($ptype eq "code") {
 663       # Code paragraph. Emit each line with a seven character indent.
 664       foreach $i (@$pname) {
 665         warn "code line longer than 68 chars: $i\n" if length $i > 68;
 666         print ' 'x7, $i, "\n";
 667       }
 668     } elsif ($ptype =~ /^(norm|bull|indt|bquo)$/) {
 669       # Ordinary paragraph, optionally indented. We wrap, with ragged
 670       # 75-char right margin and either 7 or 11 char left margin
 671       # depending on bullets.
 672       if ($ptype ne 'norm') {
 673           $line = ' 'x7 . (($ptype eq 'bull') ? '(*) ' : '    ');
 674           $next = ' 'x11;
 675       } else {
 676         $line = $next = ' 'x7;
 677       }
 678       @a = @$pname;
 679       $wd = $wprev = '';
 680       do {
 681         do { $w = &word_txt(shift @a) } while $w eq "\001"; # nasty hack
 682         $wd .= $wprev;
 683         if ($wprev =~ /-$/ || $w eq ' ' || $w eq '' || $w eq undef) {
 684           if (length ($line . $wd) > 75) {
 685             $line =~ s/\s*$//; # trim trailing spaces
 686             print "$line\n";
 687             $line = $next;
 688             $wd =~ s/^\s*//; # trim leading spaces
 689           }
 690           $line .= $wd;
 691           $wd = '';
 692         }
 693         $wprev = $w;
 694       } while ($w ne '' && $w ne undef);
 695       if ($line =~ /\S/) {
 696         $line =~ s/\s*$//; # trim trailing spaces
 697         print "$line\n";
 698       }
 699     }
 700   }
 701
 702   # Close file.
 703   select STDOUT;
 704   close TEXT;
 705 }
 706
 707 sub word_txt {
 708   my ($w) = @_;
 709   my $wtype, $wmajt;
 710
 711   return undef if $w eq '' || $w eq undef;
 712   $wtype = substr($w,0,2);
 713   $wmajt = substr($wtype,0,1);
 714   $w = substr($w,2);
 715   $w =~ s/<.*>// if $wmajt eq "w"; # remove web links
 716   if ($wmajt eq "n" || $wtype eq "e " || $wtype eq "w ") {
 717     return $w;
 718   } elsif ($wtype eq "sp") {
 719     return ' ';
 720   } elsif ($wtype eq 'da' || $wtype eq 'dm') {
 721     return '-';
 722   } elsif ($wmajt eq "c" || $wtype eq "wc") {
 723     return "`${w}'";
 724   } elsif ($wtype eq "es") {
 725     return "_${w}";
 726   } elsif ($wtype eq "ee") {
 727     return "${w}_";
 728   } elsif ($wtype eq "eo") {
 729     return "_${w}_";
 730   } elsif ($wmajt eq "x" || $wmajt eq "i") {
 731     return "\001";
 732   } else {
 733     die "panic in word_txt: $wtype$w\n";
 734   }
 735 }
 736
 737 sub write_html {
 738   # This is called from the top level, so I won't bother using
 739   # my or local.
 740
 741   # Write contents file. Just the preamble, then a menu of links to the
 742   # separate chapter files and the nodes therein.
 743   print "writing contents file...";
 744   open TEXT, '>', File::Spec->catfile($out_path, 'nasmdoc0.html');
 745   select TEXT;
 746   &html_preamble(0);
 747   print "<p>This manual documents NASM, the Netwide Assembler: an assembler\n";
 748   print "targetting the Intel x86 series of processors, with portable source.\n</p>";
 749   print "<div class=\"toc\">\n";
 750   $level = 0;
 751   for ($node = $tstruct_next{'Top'}; $node; $node = $tstruct_next{$node}) {
 752       my $lastlevel = $level;
 753       while ($tstruct_level{$node} < $level) {
 754           print "</li>\n</ol>\n";
 755           $level--;
 756       }
 757       while ($tstruct_level{$node} > $level) {
 758           print "<ol class=\"toc", ++$level, "\">\n";
 759       }
 760       if ($lastlevel >= $level) {
 761           print "</li>\n";
 762       }
 763       $level = $tstruct_level{$node};
 764       if ($level == 1) {
 765       # Invent a file name.
 766           ($number = lc($xrefnodes{$node})) =~ s/.*-//;
 767           $fname="nasmdocx.html";
 768           substr($fname,8 - length $number, length $number) = $number;
 769           $html_fnames{$node} = $fname;
 770           $link = $fname;
 771       } else {
 772           # Use the preceding filename plus a marker point.
 773           $link = $fname . "#$xrefnodes{$node}";
 774       }
 775       $title = '';
 776       $pname = $tstruct_pname{$node};
 777       foreach $i (@$pname) {
 778           $ww = &word_html($i);
 779           $title .= $ww unless $ww eq "\001";
 780       }
 781       print "<li class=\"toc${level}\">\n";
 782       print "<span class=\"node\">$node: </span><a href=\"$link\">$title</a>\n";
 783   }
 784   while ($level--) {
 785       print "</li>\n</ol>\n";
 786   }
 787   print "</div>\n";
 788   print "</body>\n";
 789   print "</html>\n";
 790   select STDOUT;
 791   close TEXT;
 792
 793   # Open a null file, to ensure output (eg random &html_jumppoints calls)
 794   # goes _somewhere_.
 795   print "writing chapter files...";
 796   open TEXT, '>', File::Spec->devnull();
 797   select TEXT;
 798   undef $html_nav_last;
 799   undef $html_nav_next;
 800
 801   $in_list = 0;
 802   $in_bquo = 0;
 803   $in_code = 0;
 804
 805   for ($para = 0; $para <= $#pnames; $para++) {
 806     $pname = $pnames[$para];
 807     $pflags = $pflags[$para];
 808     $ptype = substr($pflags,0,4);
 809
 810     $in_code = 0, print "</pre>\n" if ($in_code && $ptype ne 'code');
 811     $in_list = 0, print "</li>\n</ul>\n" if ($in_list && $ptype !~ /^(bull|indt|code)$/);
 812     $in_bquo = 0, print "</blockquote>\n" if ($in_bquo && $ptype ne 'bquo');
 813
 814     $endtag = '';
 815
 816     if ($ptype eq "chap") {
 817       # Chapter heading. Begin a new file.
 818       $pflags =~ /chap (.*) :(.*)/;
 819       $title = "Chapter $1: ";
 820       $xref = $2;
 821       &html_postamble; select STDOUT; close TEXT;
 822       $html_nav_last = $chapternode;
 823       $chapternode = $nodexrefs{$xref};
 824       $html_nav_next = $tstruct_mnext{$chapternode};
 825       open(TEXT, '>', File::Spec->catfile($out_path, $html_fnames{$chapternode}));
 826       select TEXT;
 827       &html_preamble(1);
 828       foreach $i (@$pname) {
 829         $ww = &word_html($i);
 830         $title .= $ww unless $ww eq "\001";
 831       }
 832       $h = "<h2 id=\"$xref\">$title</h2>\n";
 833       print $h; print FULL $h;
 834     } elsif ($ptype eq "appn") {
 835       # Appendix heading. Begin a new file.
 836       $pflags =~ /appn (.*) :(.*)/;
 837       $title = "Appendix $1: ";
 838       $xref = $2;
 839       &html_postamble; select STDOUT; close TEXT;
 840       $html_nav_last = $chapternode;
 841       $chapternode = $nodexrefs{$xref};
 842       $html_nav_next = $tstruct_mnext{$chapternode};
 843       open(TEXT, '>', File::Spec->catfile($out_path, $html_fnames{$chapternode}));
 844       select TEXT;
 845       &html_preamble(1);
 846       foreach $i (@$pname) {
 847         $ww = &word_html($i);
 848         $title .= $ww unless $ww eq "\001";
 849       }
 850       print "<h2 id=\"$xref\">$title</h2>\n";
 851     } elsif ($ptype eq "head" || $ptype eq "subh") {
 852       # Heading or subheading.
 853       $pflags =~ /.... (.*) :(.*)/;
 854       $hdr = ($ptype eq "subh" ? "h4" : "h3");
 855       $title = $1 . " ";
 856       $xref = $2;
 857       foreach $i (@$pname) {
 858         $ww = &word_html($i);
 859         $title .= $ww unless $ww eq "\001";
 860       }
 861       print "<$hdr id=\"$xref\">$title</$hdr>\n";
 862     } elsif ($ptype eq "code") {
 863         # Code paragraph.
 864         $in_code = 1, print "<pre>" unless $in_code;
 865         print "\n";
 866         foreach $i (@$pname) {
 867             $w = $i;
 868             $w =~ s/&/&amp;/g;
 869             $w =~ s/</&lt;/g;
 870             $w =~ s/>/&gt;/g;
 871             print $w, "\n";
 872         }
 873     } elsif ($ptype =~ /^(norm|bull|indt|bquo)$/) {
 874       # Ordinary paragraph, optionally indented.
 875         if ($ptype eq 'bull') {
 876             if (!$in_list) {
 877                 $in_list = 1;
 878                 print "<ul>\n";
 879             } else {
 880                 print "</li>\n";
 881             }
 882             print "<li>\n";
 883             $line = '<p>';
 884             $endtag = '</p>';
 885       } elsif ($ptype eq 'indt') {
 886           if (!$in_list) {
 887               $in_list = 1;
 888               print "<ul>\n";
 889               print "<li class=\"indt\">\n"; # This is such a hack
 890           }
 891           $line = '<p>';
 892           $endtag = '</p>';
 893       } elsif ($ptype eq 'bquo') {
 894           $in_bquo = 1, print "<blockquote>\n" unless $in_bquo;
 895           $line = '<p>';
 896           $endtag = '</p>';
 897       } else {
 898         $line = '<p>';
 899         $endtag = '</p>';
 900       }
 901       @a = @$pname;
 902       $wd = $wprev = '';
 903       do {
 904         do { $w = &word_html(shift @a) } while $w eq "\001"; # nasty hack
 905         $wd .= $wprev;
 906         if ($w eq ' ' || $w eq '' || $w eq undef) {
 907           if (length ($line . $wd) > 75) {
 908             $line =~ s/\s*$//; # trim trailing spaces
 909             print "$line\n";
 910             $line = '';
 911             $wd =~ s/^\s*//; # trim leading spaces
 912           }
 913           $line .= $wd;
 914           $wd = '';
 915         }
 916         $wprev = $w;
 917       } while ($w ne '' && $w ne undef);
 918       if ($line =~ /\S/) {
 919         $line =~ s/\s*$//; # trim trailing spaces
 920         print $line;
 921       }
 922       print $endtag, "\n";
 923     }
 924   }
 925
 926   # Close whichever file was open.
 927   print "</pre>\n" if ($in_code);
 928   print "</li>\n</ul>\n" if ($in_list);
 929   print "</blockquote>\n" if ($in_bquo);
 930   &html_postamble; select STDOUT; close TEXT;
 931
 932   print "\n   writing index file...";
 933   open TEXT, '>', File::Spec->catfile($out_path, 'nasmdoci.html');
 934   select TEXT;
 935   &html_preamble(0);
 936   print "<h2 class=\"index\">Index</h2>\n";
 937   print "<ul class=\"index\">\n";
 938   &html_index;
 939   print "</ul>\n</body>\n</html>\n";
 940   select STDOUT;
 941   close TEXT;
 942 }
 943
 944 sub html_preamble {
 945     print "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\" ?>\n";
 946     print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" ";
 947     print "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n";
 948     print "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n";
 949     print "<head>\n";
 950     print "<title>", $metadata{'title'}, "</title>\n";
 951     print "<link href=\"nasmdoc.css\" rel=\"stylesheet\" type=\"text/css\" />\n";
 952     print "<link href=\"local.css\" rel=\"stylesheet\" type=\"text/css\" />\n";
 953     print "</head>\n";
 954     print "<body>\n";
 955
 956     # Navigation bar
 957     print "<ul class=\"navbar\">\n";
 958     if (defined($html_nav_last)) {
 959         my $lastf = $html_fnames{$html_nav_last};
 960         print "<li class=\"first\"><a class=\"prev\" href=\"$lastf\">$html_nav_last</a></li>\n";
 961     }
 962     if (defined($html_nav_next)) {
 963         my $nextf = $html_fnames{$html_nav_next};
 964         print "<li><a class=\"next\" href=\"$nextf\">$html_nav_next</a></li>\n";
 965     }
 966     print "<li><a class=\"toc\" href=\"nasmdoc0.html\">Contents</a></li>\n";
 967     print "<li class=\"last\"><a class=\"index\" href=\"nasmdoci.html\">Index</a></li>\n";
 968     print "</ul>\n";
 969
 970     print "<div class=\"title\">\n";
 971     print "<h1>", $metadata{'title'}, "</h1>\n";
 972     print '<span class="subtitle">', $metadata{'subtitle'}, "</span>\n";
 973     print "</div>\n";
 974     print "<div class=\"contents\"\n>\n";
 975 }
 976
 977 sub html_postamble {
 978     # Common closing tags
 979     print "</div>\n</body>\n</html>\n";
 980 }
 981
 982 sub html_index {
 983   my $itag, $a, @ientry, $sep, $w, $wd, $wprev, $line;
 984
 985   $chapternode = '';
 986   foreach $itag (@itags) {
 987     $ientry = $idxmap{$itag};
 988     @a = @$ientry;
 989     push @a, "n :";
 990     $sep = 0;
 991     foreach $node (@nodes) {
 992       next if !$idxnodes{$node,$itag};
 993       push @a, "n ," if $sep;
 994       push @a, "sp", "x $xrefnodes{$node}", "n $node", "xe$xrefnodes{$node}";
 995       $sep = 1;
 996     }
 997     print "<li class=\"index\">\n";
 998     $line = '';
 999     do {
1000       do { $w = &word_html(shift @a) } while $w eq "\001"; # nasty hack
1001       $wd .= $wprev;
1002       if ($w eq ' ' || $w eq '' || $w eq undef) {
1003         if (length ($line . $wd) > 75) {
1004           $line =~ s/\s*$//; # trim trailing spaces
1005           print "$line\n";
1006           $line = '';
1007           $wd =~ s/^\s*//; # trim leading spaces
1008         }
1009         $line .= $wd;
1010         $wd = '';
1011       }
1012       $wprev = $w;
1013     } while ($w ne '' && $w ne undef);
1014     if ($line =~ /\S/) {
1015       $line =~ s/\s*$//; # trim trailing spaces
1016       print $line, "\n";
1017     }
1018     print "</li>\n";
1019   }
1020 }
1021
1022 sub word_html {
1023   my ($w) = @_;
1024   my $wtype, $wmajt, $pfx, $sfx;
1025
1026   return undef if $w eq '' || $w eq undef;
1027
1028   $wtype = substr($w,0,2);
1029   $wmajt = substr($wtype,0,1);
1030   $w = substr($w,2);
1031   $pfx = $sfx = '';
1032   $pfx = "<a href=\"$1\">", $sfx = "</a>", $w = $2
1033     if $wmajt eq "w" && $w =~ /^<(.*)>(.*)$/;
1034   $w =~ s/&/&amp;/g;
1035   $w =~ s/</&lt;/g;
1036   $w =~ s/>/&gt;/g;
1037   if ($wmajt eq "n" || $wtype eq "e " || $wtype eq "w ") {
1038     return $pfx . $w . $sfx;
1039   } elsif ($wtype eq "sp") {
1040     return ' ';
1041   } elsif ($wtype eq "da") {
1042     return '&ndash;';
1043   } elsif ($wtype eq "dm") {
1044     return '&mdash;';
1045   } elsif ($wmajt eq "c" || $wtype eq "wc") {
1046     return $pfx . "<code>${w}</code>" . $sfx;
1047   } elsif ($wtype eq "es") {
1048     return "<em>${w}";
1049   } elsif ($wtype eq "ee") {
1050     return "${w}</em>";
1051   } elsif ($wtype eq "eo") {
1052     return "<em>${w}</em>";
1053   } elsif ($wtype eq "x ") {
1054     # Magic: we must resolve the cross reference into file and marker
1055     # parts, then dispose of the file part if it's us, and dispose of
1056     # the marker part if the cross reference describes the top node of
1057     # another file.
1058     my $node = $nodexrefs{$w}; # find the node we're aiming at
1059     my $level = $tstruct_level{$node}; # and its level
1060     my $up = $node, $uplev = $level-1;
1061     $up = $tstruct_up{$up} while $uplev--; # get top node of containing file
1062     my $file = ($up ne $chapternode) ? $html_fnames{$up} : "";
1063     my $marker = ($level == 1 and $file) ? "" : "#$w";
1064     return "<a href=\"$file$marker\">";
1065   } elsif ($wtype eq "xe") {
1066     return "</a>";
1067   } elsif ($wmajt eq "i") {
1068     return "\001";
1069   } else {
1070     die "panic in word_html: $wtype$w\n";
1071   }
1072 }
1073
1074 # Make tree structures. $tstruct_* is top-level and global.
1075 sub add_item {
1076   my ($item, $level) = @_;
1077   my $i;
1078
1079   $tstruct_pname{$item} = $pname;
1080   $tstruct_next{$tstruct_previtem} = $item;
1081   $tstruct_prev{$item} = $tstruct_previtem;
1082   $tstruct_level{$item} = $level;
1083   $tstruct_up{$item} = $tstruct_last[$level-1];
1084   $tstruct_mnext{$tstruct_last[$level]} = $item;
1085   $tstruct_last[$level] = $item;
1086   for ($i=$level+1; $i<$MAXLEVEL; $i++) { $tstruct_last[$i] = undef; }
1087   $tstruct_previtem = $item;
1088   push @nodes, $item;
1089 }
1090
1091 #
1092 # This produces documentation intermediate paragraph format; this is
1093 # basically the digested output of the front end.  Intended for use
1094 # by future backends, instead of putting it all in the same script.
1095 #
1096 sub write_dip {
1097   open(PARAS, '>', File::Spec->catfile($out_path, 'nasmdoc.dip'));
1098   foreach $k (sort(keys(%metadata))) {
1099       print PARAS 'meta :', $k, "\n";
1100       print PARAS $metadata{$k},"\n";
1101   }
1102   for ($para = 0; $para <= $#pnames; $para++) {
1103       print PARAS $pflags[$para], "\n";
1104       print PARAS join("\037", @{$pnames[$para]}, "\n");
1105   }
1106   foreach $k (@itags) {
1107       print PARAS 'indx :', $k, "\n";
1108       print PARAS join("\037", @{$idxmap{$k}}), "\n";
1109   }
1110   close(PARAS);
1111 }