doc: improve the look of the documentation with better fonts
[nasm.git] / doc / genps.pl
blob3cacdb9ab0b5733dce7907bdafc6178306d60121
1 #!/usr/bin/perl
2 ## --------------------------------------------------------------------------
3 ##
4 ## Copyright 1996-2017 The NASM Authors - All Rights Reserved
5 ## See the file AUTHORS included with the NASM distribution for
6 ## the specific copyright holders.
7 ##
8 ## Redistribution and use in source and binary forms, with or without
9 ## modification, are permitted provided that the following
10 ## conditions are met:
12 ## * Redistributions of source code must retain the above copyright
13 ## notice, this list of conditions and the following disclaimer.
14 ## * Redistributions in binary form must reproduce the above
15 ## copyright notice, this list of conditions and the following
16 ## disclaimer in the documentation and/or other materials provided
17 ## with the distribution.
19 ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20 ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31 ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 ## --------------------------------------------------------------------------
36 # Format the documentation as PostScript
39 use File::Spec;
41 require 'psfonts.ph'; # The fonts we want to use
42 require 'pswidth.ph'; # PostScript string width
43 require 'findfont.ph'; # Find fonts in the system
46 # PostScript configurables; these values are also available to the
47 # PostScript code itself
49 %psconf = (
50 pagewidth => 595, # Page width in PostScript points
51 pageheight => 792, # Page height in PostScript points
52 lmarg => 72*1.25, # Left margin in PostScript points
53 rmarg => 72, # Right margin in PostScript points
54 topmarg => 72, # Top margin in PostScript points
55 botmarg => 72, # Bottom margin in PostScript points
56 plmarg => 72*0.25, # Page number position relative to left margin
57 prmarg => 0, # Page number position relative to right margin
58 pymarg => 24, # Page number position relative to bot margin
59 startcopyright => 75, # How much above the bottom margin is the
60 # copyright notice stuff
61 bulladj => 12, # How much to indent a bullet paragraph
62 tocind => 12, # TOC indentation per level
63 tocpnz => 24, # Width of TOC page number only zone
64 tocdots => 8, # Spacing between TOC dots
65 idxspace => 24, # Minimum space between index title and pg#
66 idxindent => 24, # How much to indent a subindex entry
67 idxgutter => 24, # Space between index columns
68 idxcolumns => 2, # Number of index columns
71 %psbool = (
72 colorlinks => 0, # Set links in blue rather than black
75 # Known paper sizes
76 %papersizes = (
77 'a5' => [421, 595], # ISO half paper size
78 'b5' => [501, 709], # ISO small paper size
79 'a4' => [595, 842], # ISO standard paper size
80 'letter' => [612, 792], # US common paper size
81 'pa4' => [595, 792], # Compromise ("portable a4")
82 'b4' => [709,1002], # ISO intermediate paper size
83 'legal' => [612,1008], # US intermediate paper size
84 'a3' => [842,1190], # ISO double paper size
85 '11x17' => [792,1224], # US double paper size
88 # Canned header file
89 $headps = 'head.ps';
92 # Parse the command line
94 undef $input;
95 while ( $arg = shift(@ARGV) ) {
96 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
97 $parm = $2;
98 $true = ($1 eq '') ? 1 : 0;
99 if ( $true && defined($papersizes{$parm}) ) {
100 $psconf{pagewidth} = $papersizes{$parm}->[0];
101 $psconf{pageheight} = $papersizes{$parm}->[1];
102 } elsif ( defined($psbool{$parm}) ) {
103 $psbool{$parm} = $true;
104 } elsif ( $true && defined($psconf{$parm}) ) {
105 $psconf{$parm} = shift(@ARGV);
106 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
107 $metadata{$parm} = shift(@ARGV);
108 } elsif ( $parm =~ /^fontsdir$/ ) {
109 $fontsdir = shift(@ARGV);
110 } elsif ( $parm eq 'headps' ) {
111 $headps = shift(@ARGV);
112 } else {
113 die "$0: Unknown option: $arg\n";
115 } else {
116 $input = $arg;
121 # Document formatting parameters
123 $paraskip = 6; # Space between paragraphs
124 $chapstart = 30; # Space before a chapter heading
125 $chapskip = 24; # Space after a chapter heading
126 $tocskip = 6; # Space between TOC entries
128 # Configure post-paragraph skips for each kind of paragraph
129 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
130 'head' => $paraskip, 'subh' => $paraskip,
131 'norm' => $paraskip, 'bull' => $paraskip,
132 'code' => $paraskip, 'toc0' => $tocskip,
133 'toc1' => $tocskip, 'toc2' => $tocskip);
135 # Read the font metrics files, and update @AllFonts
136 # Get the list of fonts used
137 %ps_all_fonts = ();
138 %ps_font_subst = ();
139 foreach my $fset ( @AllFonts ) {
140 foreach my $font ( @{$fset->{fonts}} ) {
141 my $fdata;
142 my @flist = @{$font->[1]};
143 my $fname;
144 while (defined($fname = shift(@flist))) {
145 $fdata = findfont($fname);
146 last if (defined($fdata));
148 if (!defined($fdata)) {
149 die "$infile: no font found of: ".
150 join(', ', @{$font->[1]}), "\n".
151 "Install one of these fonts or update psfonts.ph\n";
153 $ps_all_fonts{$fname} = $fdata;
154 $font->[1] = $fdata;
158 # Custom encoding vector. This is basically the same as
159 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
160 # but with the "naked" accents at \200-\237 moved to the \000-\037
161 # range (ASCII control characters), and a few extra characters thrown
162 # in. It is basically a modified Windows 1252 codepage, minus, for
163 # now, the euro sign (\200 is reserved for euro.)
165 @NASMEncoding =
167 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
168 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
169 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
170 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
171 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
172 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
173 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
174 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
175 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
176 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
177 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
178 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
179 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
180 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
181 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
182 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
183 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
184 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
185 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
186 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
187 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
188 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
189 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
190 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
191 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
192 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
193 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
194 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
195 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
196 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
197 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
198 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
199 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
200 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
201 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
202 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
203 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
204 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
205 'thorn', 'ydieresis'
208 # Name-to-byte lookup hash
209 %charcode = ();
210 for ( $i = 0 ; $i < 256 ; $i++ ) {
211 $charcode{$NASMEncoding[$i]} = chr($i);
215 # First, format the stuff coming from the front end into
216 # a cleaner representation
218 if ( defined($input) ) {
219 open(PARAS, '<', $input) or
220 die "$0: cannot open $input: $!\n";
221 } else {
222 # stdin
223 open(PARAS, '<-') or die "$0: $!\n";
225 while ( defined($line = <PARAS>) ) {
226 chomp $line;
227 $data = <PARAS>;
228 chomp $data;
229 if ( $line =~ /^meta :(.*)$/ ) {
230 $metakey = $1;
231 $metadata{$metakey} = $data;
232 } elsif ( $line =~ /^indx :(.*)$/ ) {
233 $ixentry = $1;
234 push(@ixentries, $ixentry);
235 $ixterms{$ixentry} = [split(/\037/, $data)];
236 # Look for commas. This is easier done on the string
237 # representation, so do it now.
238 if ( $data =~ /^(.*)\,\037sp\037/ ) {
239 $ixprefix = $1;
240 $ixprefix =~ s/\037n $//; # Discard possible font change at end
241 $ixhasprefix{$ixentry} = $ixprefix;
242 if ( !$ixprefixes{$ixprefix} ) {
243 $ixcommafirst{$ixentry}++;
245 $ixprefixes{$ixprefix}++;
246 } else {
247 # A complete term can also be used as a prefix
248 $ixprefixes{$data}++;
250 } else {
251 push(@ptypes, $line);
252 push(@paras, [split(/\037/, $data)]);
255 close(PARAS);
258 # Convert an integer to a chosen base
260 sub int2base($$) {
261 my($i,$b) = @_;
262 my($s) = '';
263 my($n) = '';
264 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
265 return '0' if ($i == 0);
266 if ( $i < 0 ) { $n = '-'; $i = -$i; }
267 while ( $i ) {
268 $s = substr($z,$i%$b,1) . $s;
269 $i = int($i/$b);
271 return $n.$s;
275 # Convert a string to a rendering array
277 sub string2array($)
279 my($s) = @_;
280 my(@a) = ();
282 $s =~ s/\B\-\-\B/$charcode{'emdash'}/g;
283 $s =~ s/\B\-\B/ $charcode{'endash'} /g;
285 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
286 push(@a, [0,$1]);
287 $s = $2;
290 return @a;
294 # Take a crossreference name and generate the PostScript name for it.
296 # This hack produces a somewhat smaller PDF...
297 #%ps_xref_list = ();
298 #$ps_xref_next = 0;
299 #sub ps_xref($) {
300 # my($s) = @_;
301 # my $q = $ps_xref_list{$s};
302 # return $q if ( defined($ps_xref_list{$s}) );
303 # $q = 'X'.int2base($ps_xref_next++, 52);
304 # $ps_xref_list{$s} = $q;
305 # return $q;
308 # Somewhat bigger PDF, but one which obeys # URLs
309 sub ps_xref($) {
310 return @_[0];
314 # Flow lines according to a particular font set and width
316 # A "font set" is represented as an array containing
317 # arrays of pairs: [<size>, <metricref>]
319 # Each line is represented as:
320 # [ [type,first|last,aux,fontset,page,ypos,optional col],
321 # [rendering array] ]
323 # A space character may be "squeezed" by up to this much
324 # (as a fraction of the normal width of a space.)
326 $ps_space_squeeze = 0.00; # Min space width 100%
327 sub ps_flow_lines($$$@) {
328 my($wid, $fontset, $type, @data) = @_;
329 my($fonts) = $$fontset{fonts};
330 my($e);
331 my($w) = 0; # Width of current line
332 my($sw) = 0; # Width of current line due to spaces
333 my(@l) = (); # Current line
334 my(@ls) = (); # Accumulated output lines
335 my(@xd) = (); # Metadata that goes with subsequent text
336 my $hasmarker = 0; # Line has -6 marker
337 my $pastmarker = 0; # -6 marker found
339 # If there is a -6 marker anywhere in the paragraph,
340 # *each line* output needs to have a -6 marker
341 foreach $e ( @data ) {
342 $hasmarker = 1 if ( $$e[0] == -6 );
345 $w = 0;
346 foreach $e ( @data ) {
347 if ( $$e[0] < 0 ) {
348 # Type is metadata. Zero width.
349 if ( $$e[0] == -6 ) {
350 $pastmarker = 1;
352 if ( $$e[0] == -1 || $$e[0] == -6 ) {
353 # -1 (end anchor) or -6 (marker) goes with the preceeding
354 # text, otherwise with the subsequent text
355 push(@l, $e);
356 } else {
357 push(@xd, $e);
359 } else {
360 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
361 \@NASMEncoding) *
362 ($fontset->{fonts}->[$$e[0]][0]/1000);
363 my $sp = $$e[1];
364 $sp =~ tr/[^ ]//d; # Delete nonspaces
365 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
366 \@NASMEncoding) *
367 ($fontset->{fonts}->[$$e[0]][0]/1000);
369 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
370 # Begin new line
371 # Search backwards for previous space chunk
372 my $lx = scalar(@l)-1;
373 my @rm = ();
374 while ( $lx >= 0 ) {
375 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
376 # Skip metadata
377 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
378 $lx--;
380 if ( $lx >= 0 ) {
381 if ( $l[$lx]->[1] eq ' ' ) {
382 splice(@l, $lx, 1);
383 @rm = splice(@l, $lx);
384 last; # Found place to break
385 } else {
386 $lx--;
391 # Now @l contains the stuff to remain on the old line
392 # If we broke the line inside a link, then split the link
393 # into two.
394 my $lkref = undef;
395 foreach my $lc ( @l ) {
396 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
397 $lkref = $lc;
398 } elsif ( $$lc[0] == -1 ) {
399 undef $lkref;
403 if ( defined($lkref) ) {
404 push(@l, [-1,undef]); # Terminate old reference
405 unshift(@rm, $lkref); # Duplicate reference on new line
408 if ( $hasmarker ) {
409 if ( $pastmarker ) {
410 unshift(@rm,[-6,undef]); # New line starts with marker
411 } else {
412 push(@l,[-6,undef]); # Old line ends with marker
416 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
417 @l = @rm;
419 $w = $sw = 0;
420 # Compute the width of the remainder array
421 for my $le ( @l ) {
422 if ( $$le[0] >= 0 ) {
423 my $xew = ps_width($$le[1],
424 $fontset->{fonts}->[$$le[0]][1],
425 \@NASMEncoding) *
426 ($fontset->{fonts}->[$$le[0]][0]/1000);
427 my $xsp = $$le[1];
428 $xsp =~ tr/[^ ]//d; # Delete nonspaces
429 my $xsw = ps_width($xsp,
430 $fontset->{fonts}->[$$le[0]][1],
431 \@NASMEncoding) *
432 ($fontset->{fonts}->[$$le[0]][0]/1000);
433 $w += $xew; $sw += $xsw;
437 push(@l, @xd); # Accumulated metadata
438 @xd = ();
439 if ( $$e[1] ne '' ) {
440 push(@l, $e);
441 $w += $ew; $sw += $esw;
445 push(@l,@xd);
446 if ( scalar(@l) ) {
447 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
450 # Mark the first line as first and the last line as last
451 if ( scalar(@ls) ) {
452 $ls[0]->[0]->[1] |= 1; # First in para
453 $ls[-1]->[0]->[1] |= 2; # Last in para
455 return @ls;
459 # Once we have broken things into lines, having multiple chunks
460 # with the same font index is no longer meaningful. Merge
461 # adjacent chunks to keep down the size of the whole file.
463 sub ps_merge_chunks(@) {
464 my(@ci) = @_;
465 my($c, $lc);
466 my(@co, $eco);
468 undef $lc;
469 @co = ();
470 $eco = -1; # Index of the last entry in @co
471 foreach $c ( @ci ) {
472 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
473 $co[$eco]->[1] .= $$c[1];
474 } else {
475 push(@co, $c); $eco++;
476 $lc = $$c[0];
479 return @co;
483 # Convert paragraphs to rendering arrays. Each
484 # element in the array contains (font, string),
485 # where font can be one of:
486 # -1 end link
487 # -2 begin crossref
488 # -3 begin weblink
489 # -4 index item anchor
490 # -5 crossref anchor
491 # -6 left/right marker (used in the index)
492 # -7 page link (used in the index)
493 # 0 normal
494 # 1 empatic (italic)
495 # 2 code (fixed spacing)
498 sub mkparaarray($@) {
499 my($ptype, @chunks) = @_;
501 my @para = ();
502 my $in_e = 0;
503 my $chunk;
505 if ( $ptype =~ /^code/ ) {
506 foreach $chunk ( @chunks ) {
507 push(@para, [2, $chunk]);
509 } else {
510 foreach $chunk ( @chunks ) {
511 my $type = substr($chunk,0,2);
512 my $text = substr($chunk,2);
514 if ( $type eq 'sp' ) {
515 push(@para, [$in_e?1:0, ' ']);
516 } elsif ( $type eq 'da' ) {
517 push(@para, [$in_e?1:0, $charcode{'endash'}]);
518 } elsif ( $type eq 'n ' ) {
519 push(@para, [0, $text]);
520 $in_e = 0;
521 } elsif ( $type =~ '^e' ) {
522 push(@para, [1, $text]);
523 $in_e = ($type eq 'es' || $type eq 'e ');
524 } elsif ( $type eq 'c ' ) {
525 push(@para, [2, $text]);
526 $in_e = 0;
527 } elsif ( $type eq 'x ' ) {
528 push(@para, [-2, ps_xref($text)]);
529 } elsif ( $type eq 'xe' ) {
530 push(@para, [-1, undef]);
531 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
532 $text =~ /\<(.*)\>(.*)$/;
533 my $link = $1; $text = $2;
534 push(@para, [-3, $link]);
535 push(@para, [($type eq 'wc') ? 2:0, $text]);
536 push(@para, [-1, undef]);
537 $in_e = 0;
538 } elsif ( $type eq 'i ' ) {
539 push(@para, [-4, $text]);
540 } else {
541 die "Unexpected paragraph chunk: $chunk";
545 return @para;
548 $npara = scalar(@paras);
549 for ( $i = 0 ; $i < $npara ; $i++ ) {
550 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
554 # This converts a rendering array to a simple string
556 sub ps_arraytostr(@) {
557 my $s = '';
558 my $c;
559 foreach $c ( @_ ) {
560 $s .= $$c[1] if ( $$c[0] >= 0 );
562 return $s;
566 # This generates a duplicate of a paragraph
568 sub ps_dup_para(@) {
569 my(@i) = @_;
570 my(@o) = ();
571 my($c);
573 foreach $c ( @i ) {
574 my @cc = @{$c};
575 push(@o, [@cc]);
577 return @o;
581 # This generates a duplicate of a paragraph, stripping anchor
582 # tags (-4 and -5)
584 sub ps_dup_para_noanchor(@) {
585 my(@i) = @_;
586 my(@o) = ();
587 my($c);
589 foreach $c ( @i ) {
590 my @cc = @{$c};
591 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
593 return @o;
597 # Scan for header paragraphs and fix up their contents;
598 # also generate table of contents and PDF bookmarks.
600 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
601 @tocptypes = ('chap');
602 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
603 %bookref = ();
604 for ( $i = 0 ; $i < $npara ; $i++ ) {
605 my $xtype = $ptypes[$i];
606 my $ptype = substr($xtype,0,4);
607 my $str;
608 my $book;
610 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
611 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
612 die "Bad para";
614 my $secn = $1;
615 my $sech = $2;
616 my $xref = ps_xref($sech);
617 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
619 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
620 push(@bookmarks, $book);
621 $bookref{$secn} = $book;
623 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
624 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
626 unshift(@{$paras[$i]},
627 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
628 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
629 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
630 die "Bad para";
632 my $secn = $1;
633 my $sech = $2;
634 my $xref = ps_xref($sech);
635 my $pref;
636 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
638 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
639 push(@bookmarks, $book);
640 $bookref{$secn} = $book;
641 $bookref{$pref}->[1]--; # Adjust count for parent node
643 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
644 push(@tocptypes,
645 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
647 unshift(@{$paras[$i]}, [-5, $xref]);
652 # Add TOC to beginning of paragraph list
654 unshift(@paras, @tocparas); undef @tocparas;
655 unshift(@ptypes, @tocptypes); undef @tocptypes;
658 # Add copyright notice to the beginning
660 @copyright_page =
661 ([[0, $charcode{'copyright'}],
662 [0, ' '], [0, $metadata{'year'}],
663 [0, ' '], string2array($metadata{'author'}),
664 [0, ' '], string2array($metadata{'copyright_tail'})],
665 [string2array($metadata{'license'})],
666 [string2array($metadata{'auxinfo'})]);
668 unshift(@paras, @copyright_page);
669 unshift(@ptypes, ('norm') x scalar(@copyright_page));
671 $npara = scalar(@paras);
674 # No lines generated, yet.
676 @pslines = ();
679 # Line Auxilliary Information Types
681 $AuxStr = 1; # String
682 $AuxPage = 2; # Page number (from xref)
683 $AuxPageStr = 3; # Page number as a PostScript string
684 $AuxXRef = 4; # Cross reference as a name
685 $AuxNum = 5; # Number
688 # Break or convert paragraphs into lines, and push them
689 # onto the @pslines array.
691 sub ps_break_lines($$) {
692 my ($paras,$ptypes) = @_;
694 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
695 my $bullwidth = $linewidth-$psconf{bulladj};
696 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
697 -$psconf{idxspace};
699 my $npara = scalar(@{$paras});
700 my $i;
702 for ( $i = 0 ; $i < $npara ; $i++ ) {
703 my $xtype = $ptypes->[$i];
704 my $ptype = substr($xtype,0,4);
705 my @data = @{$paras->[$i]};
706 my @ls = ();
707 if ( $ptype eq 'code' ) {
708 my $p;
709 # Code paragraph; each chunk is a line
710 foreach $p ( @data ) {
711 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
713 $ls[0]->[0]->[1] |= 1; # First in para
714 $ls[-1]->[0]->[1] |= 2; # Last in para
715 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
716 # Chapters are flowed normally, but in an unusual font
717 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
718 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
719 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
720 die "Bad para";
722 my $secn = $1;
723 my $sech = $2;
724 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
725 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
726 # We need the heading number as auxillary data
727 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
728 } elsif ( $ptype eq 'norm' ) {
729 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
730 } elsif ( $ptype eq 'bull' ) {
731 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
732 } elsif ( $ptype =~ /^toc/ ) {
733 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
734 die "Bad para";
736 my $xref = $1;
737 my $refname = $2.' ';
738 my $ntoc = substr($ptype,3,1)+0;
739 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
740 \@NASMEncoding) *
741 ($BodyFont{fonts}->[0][0]/1000);
743 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
744 $psconf{tocpnz}-$refwidth,
745 \%BodyFont, $ptype, @data);
747 # Auxilliary data: for the first line, the cross reference symbol
748 # and the reference name; for all lines but the first, the
749 # reference width; and for the last line, the page number
750 # as a string.
751 my $nl = scalar(@ls);
752 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
753 for ( $j = 1 ; $j < $nl ; $j++ ) {
754 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
756 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
757 } elsif ( $ptype =~ /^idx/ ) {
758 my $lvl = substr($ptype,3,1)+0;
760 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
761 \%BodyFont, $ptype, @data);
762 } else {
763 die "Unknown para type: $ptype";
765 # Merge adjacent identical chunks
766 foreach $l ( @ls ) {
767 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
769 push(@pslines,@ls);
773 # Break the main body text into lines.
774 ps_break_lines(\@paras, \@ptypes);
777 # Break lines in to pages
780 # Where to start on page 2, the copyright page
781 $curpage = 2; # Start on page 2
782 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
783 $psconf{startcopyright};
784 undef $columnstart; # Not outputting columnar text
785 undef $curcolumn; # Current column
786 $nlines = scalar(@pslines);
789 # This formats lines inside the global @pslines array into pages,
790 # updating the page and y-coordinate entries. Start at the
791 # $startline position in @pslines and go to but not including
792 # $endline. The global variables $curpage, $curypos, $columnstart
793 # and $curcolumn are updated appropriately.
795 sub ps_break_pages($$) {
796 my($startline, $endline) = @_;
798 # Paragraph types which should never be broken
799 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
800 # Paragraph types which are heading (meaning they should not be broken
801 # immediately after)
802 my $nobreakafter = "^(chap|appn|head|subh)\$";
803 # Paragraph types which should never be broken *before*
804 my $nobreakbefore = "^idx[1-9]\$";
805 # Paragraph types which are set in columnar format
806 my $columnregexp = "^idx.\$";
808 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
810 my $i;
812 for ( $i = $startline ; $i < $endline ; $i++ ) {
813 my $linfo = $pslines[$i]->[0];
814 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
815 && ($$linfo[1] & 1) ) {
816 # First line of a new chapter heading. Start a new page.
817 undef $columnstart;
818 $curpage++ if ( $curypos > 0 || defined($columnstart) );
819 $curypos = $chapstart;
820 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
821 undef $columnstart;
822 $curpage++;
823 $curypos = 0;
826 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
827 $columnstart = $curypos;
828 $curcolumn = 0;
831 # Adjust position by the appropriate leading
832 $curypos += $$linfo[3]->{leading};
834 # Record the page and y-position
835 $$linfo[4] = $curpage;
836 $$linfo[5] = $curypos;
837 $$linfo[6] = $curcolumn if ( defined($columnstart) );
839 if ( $curypos > $upageheight ) {
840 # We need to break the page before this line.
841 my $broken = 0; # No place found yet
842 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
843 my $linfo = $pslines[$i]->[0];
844 my $pinfo = $pslines[$i-1]->[0];
846 if ( $$linfo[1] == 2 ) {
847 # This would be an orphan, don't break.
848 } elsif ( $$linfo[1] & 1 ) {
849 # Sole line or start of paragraph. Break unless
850 # the previous line was part of a heading.
851 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
852 $$linfo[0] !~ /$nobreakbefore/o );
853 } else {
854 # Middle of paragraph. Break unless we're in a
855 # no-break paragraph, or the previous line would
856 # end up being a widow.
857 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
858 $$pinfo[1] != 1 );
860 $i--;
862 die "Nowhere to break page $curpage\n" if ( !$broken );
863 # Now $i should point to line immediately before the break, i.e.
864 # the next paragraph should be the first on the new page
865 if ( defined($columnstart) &&
866 ++$curcolumn < $psconf{idxcolumns} ) {
867 # We're actually breaking text into columns, not pages
868 $curypos = $columnstart;
869 } else {
870 undef $columnstart;
871 $curpage++;
872 $curypos = 0;
874 next;
877 # Add end of paragraph skip
878 if ( $$linfo[1] & 2 ) {
879 $curypos += $skiparray{$$linfo[0]};
884 ps_break_pages(0,$nlines); # Break the main text body into pages
887 # Find the page number of all the indices
889 %ps_xref_page = (); # Crossref anchor pages
890 %ps_index_pages = (); # Index item pages
891 $nlines = scalar(@pslines);
892 for ( $i = 0 ; $i < $nlines ; $i++ ) {
893 my $linfo = $pslines[$i]->[0];
894 foreach my $c ( @{$pslines[$i]->[1]} ) {
895 if ( $$c[0] == -4 ) {
896 if ( !defined($ps_index_pages{$$c[1]}) ) {
897 $ps_index_pages{$$c[1]} = [];
898 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
899 # Pages are emitted in order; if this is a duplicated
900 # entry it will be the last one
901 next; # Duplicate
903 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
904 } elsif ( $$c[0] == -5 ) {
905 $ps_xref_page{$$c[1]} = $$linfo[4];
911 # Emit index paragraphs
913 $startofindex = scalar(@pslines);
914 @ixparas = ([[-5,'index'],[0,'Index']]);
915 @ixptypes = ('chap');
917 foreach $k ( @ixentries ) {
918 my $n,$i;
919 my $ixptype = 'idx0';
920 my $prefix = $ixhasprefix{$k};
921 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
922 my $commapos = undef;
924 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
925 # This entry has a "hanging comma"
926 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
927 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
928 $ixpara[$i+1]->[1] eq ' ' ) {
929 $commapos = $i;
930 last;
934 if ( defined($commapos) ) {
935 if ( $ixcommafirst{$k} ) {
936 # This is the first entry; generate the
937 # "hanging comma" entry
938 my @precomma = splice(@ixpara,0,$commapos);
939 if ( $ixpara[0]->[1] eq ',' ) {
940 shift(@ixpara); # Discard lone comma
941 } else {
942 # Discard attached comma
943 $ixpara[0]->[1] =~ s/\,$//;
944 push(@precomma,shift(@ixpara));
946 push(@precomma, [-6,undef]);
947 push(@ixparas, [@precomma]);
948 push(@ixptypes, $ixptype);
949 shift(@ixpara); # Remove space
950 } else {
951 splice(@ixpara,0,$commapos+2);
953 $ixptype = 'idx1';
956 push(@ixpara, [-6,undef]); # Left/right marker
957 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
958 foreach $p ( @{$ps_index_pages{$k}} ) {
959 if ( $i++ == $n ) {
960 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
961 } else {
962 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
966 push(@ixparas, [@ixpara]);
967 push(@ixptypes, $ixptype);
971 # Flow index paragraphs into lines
973 ps_break_lines(\@ixparas, \@ixptypes);
976 # Format index into pages
978 $nlines = scalar(@pslines);
979 ps_break_pages($startofindex, $nlines);
982 # Push index onto bookmark list
984 push(@bookmarks, ['index', 0, 'Index']);
986 @all_fonts_lst = sort(keys(%ps_all_fonts));
987 $all_fonts_str = join(' ', @all_fonts_lst);
988 @need_fonts_lst = ();
989 foreach my $f (@all_fonts_lst) {
990 push(@need_fonts_lst, $f); # unless (defined($ps_all_fonts{$f}->{file}));
992 $need_fonts_str = join(' ', @need_fonts_lst);
994 # Emit the PostScript DSC header
995 print "%!PS-Adobe-3.0\n";
996 print "%%Pages: $curpage\n";
997 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
998 print "%%Creator: (NASM psflow.pl)\n";
999 print "%%DocumentData: Clean7Bit\n";
1000 print "%%DocumentFonts: $all_fonts_str\n";
1001 print "%%DocumentNeededFonts: $need_fonts_str\n";
1002 print "%%Orientation: Portrait\n";
1003 print "%%PageOrder: Ascend\n";
1004 print "%%EndComments\n";
1005 print "%%BeginProlog\n";
1007 # Emit the configurables as PostScript tokens
1008 foreach $c ( keys(%psconf) ) {
1009 print "/$c ", $psconf{$c}, " def\n";
1011 foreach $c ( keys(%psbool) ) {
1012 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
1015 # Embed font data, if applicable
1016 #foreach my $f (@all_fonts_lst) {
1017 # my $fontfile = $all_ps_fonts{$f}->{file};
1018 # if (defined($fontfile)) {
1019 # if (open(my $fh, '<', $fontfile)) {
1020 # print vector <$fh>;
1021 # close($fh);
1026 # Emit custom encoding vector
1027 $zstr = '/NASMEncoding [ ';
1028 foreach $c ( @NASMEncoding ) {
1029 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
1030 if ( length($zstr)+length($z) > 72 ) {
1031 print $zstr,"\n";
1032 $zstr = ' ';
1034 $zstr .= $z;
1036 print $zstr, "] def\n";
1038 # Font recoding routine
1039 # newname fontname --
1040 print "/nasmenc {\n";
1041 print " findfont dup length dict begin\n";
1042 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
1043 print " /Encoding NASMEncoding def\n";
1044 print " currentdict\n";
1045 print " end\n";
1046 print " definefont pop\n";
1047 print "} def\n";
1049 # Emit fontset definitions
1050 foreach $font ( sort(keys(%ps_all_fonts)) ) {
1051 print '/',$font,'-NASM /',$font," nasmenc\n";
1054 foreach $fset ( @AllFonts ) {
1055 my $i = 0;
1056 my @zfonts = ();
1057 foreach $font ( @{$fset->{fonts}} ) {
1058 print '/', $fset->{name}, $i, ' ',
1059 '/', $font->[1]->{name}, '-NASM findfont ',
1060 $font->[0], " scalefont def\n";
1061 push(@zfonts, $fset->{name}.$i);
1062 $i++;
1064 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
1067 # This is used by the bullet-paragraph PostScript methods
1068 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
1070 # Emit the canned PostScript prologue
1071 open(PSHEAD, '<', $headps)
1072 or die "$0: cannot open: $headps: $!\n";
1073 while ( defined($line = <PSHEAD>) ) {
1074 print $line;
1076 close(PSHEAD);
1077 print "%%EndProlog\n";
1079 # Generate a PostScript string
1080 sub ps_string($) {
1081 my ($s) = @_;
1082 my ($i,$c);
1083 my ($o) = '(';
1084 my ($l) = length($s);
1085 for ( $i = 0 ; $i < $l ; $i++ ) {
1086 $c = substr($s,$i,1);
1087 if ( ord($c) < 32 || ord($c) > 126 ) {
1088 $o .= sprintf("\\%03o", ord($c));
1089 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1090 $o .= "\\".$c;
1091 } else {
1092 $o .= $c;
1095 return $o.')';
1098 # Generate PDF bookmarks
1099 print "%%BeginSetup\n";
1100 foreach $b ( @bookmarks ) {
1101 print '[/Title ', ps_string($b->[2]), "\n";
1102 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1103 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1106 # Ask the PostScript interpreter for the proper size media
1107 print "setpagesize\n";
1108 print "%%EndSetup\n";
1110 # Start a PostScript page
1111 sub ps_start_page() {
1112 $ps_page++;
1113 print "%%Page: $ps_page $ps_page\n";
1114 print "%%BeginPageSetup\n";
1115 print "save\n";
1116 print "%%EndPageSetup\n";
1117 print '/', $ps_page, " pa\n";
1120 # End a PostScript page
1121 sub ps_end_page($) {
1122 my($pn) = @_;
1123 if ( $pn ) {
1124 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1126 print "restore showpage\n";
1129 $ps_page = 0;
1131 # Title page
1132 ps_start_page();
1133 $title = $metadata{'title'} || '';
1134 $title =~ s/ \- / $charcode{'emdash'} /;
1136 $subtitle = $metadata{'subtitle'} || '';
1137 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1139 # Print title
1140 print "/ti ", ps_string($title), " def\n";
1141 print "/sti ", ps_string($subtitle), " def\n";
1142 print "lmarg pageheight 2 mul 3 div moveto\n";
1143 print "tfont0 setfont\n";
1144 print "/title linkdest ti show\n";
1145 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1146 print "0 setlinecap 3 setlinewidth\n";
1147 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1148 print "hfont1 setfont sti stringwidth pop neg ",
1149 -$HeadFont{leading}, " rmoveto\n";
1150 print "sti show\n";
1152 # Print logo, if there is one
1153 # FIX: To be 100% correct, this should look for DocumentNeeded*
1154 # and DocumentFonts in the header of the EPSF and add those to the
1155 # global header.
1156 if ( defined($metadata{epslogo}) &&
1157 open(EPS, '<', $metadata{epslogo}) ) {
1158 my @eps = ();
1159 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1160 my $line;
1161 my $scale = 1;
1162 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1163 my $maxheight = $psconf{pageheight}/3-40;
1164 my $width, $height;
1165 my $x, $y;
1167 while ( defined($line = <EPS>) ) {
1168 last if ( $line =~ /^%%EOF/ );
1169 if ( !defined($bbllx) &&
1170 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1171 $bbllx = $1+0; $bblly = $2+0;
1172 $bburx = $3+0; $bbury = $4+0;
1174 push(@eps,$line);
1176 close(EPS);
1178 if ( defined($bbllx) ) {
1179 $width = $bburx-$bbllx;
1180 $height = $bbury-$bblly;
1182 if ( $width > $maxwidth ) {
1183 $scale = $maxwidth/$width;
1185 if ( $height*$scale > $maxheight ) {
1186 $scale = $maxheight/$height;
1189 $x = ($psconf{pagewidth}-$width*$scale)/2;
1190 $y = ($psconf{pageheight}-$height*$scale)/2;
1192 if ( defined($metadata{logoxadj}) ) {
1193 $x += $metadata{logoxadj};
1195 if ( defined($metadata{logoyadj}) ) {
1196 $y += $metadata{logoyadj};
1199 print "BeginEPSF\n";
1200 print $x, ' ', $y, " translate\n";
1201 print $scale, " dup scale\n" unless ( $scale == 1 );
1202 print -$bbllx, ' ', -$bblly, " translate\n";
1203 print "$bbllx $bblly moveto\n";
1204 print "$bburx $bblly lineto\n";
1205 print "$bburx $bbury lineto\n";
1206 print "$bbllx $bbury lineto\n";
1207 print "$bbllx $bblly lineto clip newpath\n";
1208 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1209 print @eps;
1210 print "%%EndDocument\n";
1211 print "EndEPSF\n";
1214 ps_end_page(0);
1216 # Emit the rest of the document (page 2 and on)
1217 $curpage = 2;
1218 ps_start_page();
1219 foreach $line ( @pslines ) {
1220 my $linfo = $line->[0];
1222 if ( $$linfo[4] != $curpage ) {
1223 ps_end_page($curpage > 2);
1224 ps_start_page();
1225 $curpage = $$linfo[4];
1228 print '[';
1229 my $curfont = 0;
1230 foreach my $c ( @{$line->[1]} ) {
1231 if ( $$c[0] >= 0 ) {
1232 if ( $curfont != $$c[0] ) {
1233 print ($curfont = $$c[0]);
1235 print ps_string($$c[1]);
1236 } elsif ( $$c[0] == -1 ) {
1237 print '{el}'; # End link
1238 } elsif ( $$c[0] == -2 ) {
1239 print '{/',$$c[1],' xl}'; # xref link
1240 } elsif ( $$c[0] == -3 ) {
1241 print '{',ps_string($$c[1]),'wl}'; # web link
1242 } elsif ( $$c[0] == -4 ) {
1243 # Index anchor -- ignore
1244 } elsif ( $$c[0] == -5 ) {
1245 print '{/',$$c[1],' xa}'; #xref anchor
1246 } elsif ( $$c[0] == -6 ) {
1247 print ']['; # Start a new array
1248 $curfont = 0;
1249 } elsif ( $$c[0] == -7 ) {
1250 print '{/',$$c[1],' pl}'; # page link
1251 } else {
1252 die "Unknown annotation";
1255 print ']';
1256 if ( defined($$linfo[2]) ) {
1257 foreach my $x ( @{$$linfo[2]} ) {
1258 if ( $$x[0] == $AuxStr ) {
1259 print ps_string($$x[1]);
1260 } elsif ( $$x[0] == $AuxPage ) {
1261 print $ps_xref_page{$$x[1]},' ';
1262 } elsif ( $$x[0] == $AuxPageStr ) {
1263 print ps_string($ps_xref_page{$$x[1]});
1264 } elsif ( $$x[0] == $AuxXRef ) {
1265 print '/',ps_xref($$x[1]),' ';
1266 } elsif ( $$x[0] == $AuxNum ) {
1267 print $$x[1],' ';
1268 } else {
1269 die "Unknown auxilliary data type";
1273 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1274 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1275 print ' ', $$linfo[0].$$linfo[1], "\n";
1278 ps_end_page(1);
1279 print "%%EOF\n";