preproc: return found_path as a constant string
[nasm.git] / doc / genps.pl
blob86d10007bfeb376c23a0e01af6797122e33d2a82
1 #!/usr/bin/perl
2 ## --------------------------------------------------------------------------
3 ##
4 ## Copyright 1996-2016 The NASM Authors - All Rights Reserved
5 ## See the file AUTHORS included with the NASM distribution for
6 ## the specific copyright holders.
7 ##
8 ## Redistribution and use in source and binary forms, with or without
9 ## modification, are permitted provided that the following
10 ## conditions are met:
12 ## * Redistributions of source code must retain the above copyright
13 ## notice, this list of conditions and the following disclaimer.
14 ## * Redistributions in binary form must reproduce the above
15 ## copyright notice, this list of conditions and the following
16 ## disclaimer in the documentation and/or other materials provided
17 ## with the distribution.
18 ##
19 ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20 ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31 ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 ## --------------------------------------------------------------------------
36 # Format the documentation as PostScript
39 require 'psfonts.ph'; # The fonts we want to use
40 require 'pswidth.ph'; # PostScript string width
42 use Fcntl;
45 # PostScript configurables; these values are also available to the
46 # PostScript code itself
48 %psconf = (
49 pagewidth => 595, # Page width in PostScript points
50 pageheight => 792, # Page height in PostScript points
51 lmarg => 100, # Left margin in PostScript points
52 rmarg => 50, # Right margin in PostScript points
53 topmarg => 100, # Top margin in PostScript points
54 botmarg => 100, # Bottom margin in PostScript points
55 plmarg => 50, # Page number position relative to left margin
56 prmarg => 0, # Page number position relative to right margin
57 pymarg => 50, # Page number position relative to bot margin
58 startcopyright => 75, # How much above the bottom margin is the
59 # copyright notice stuff
60 bulladj => 12, # How much to indent a bullet paragraph
61 tocind => 12, # TOC indentation per level
62 tocpnz => 24, # Width of TOC page number only zone
63 tocdots => 8, # Spacing between TOC dots
64 idxspace => 24, # Minimum space between index title and pg#
65 idxindent => 24, # How much to indent a subindex entry
66 idxgutter => 24, # Space between index columns
67 idxcolumns => 2, # Number of index columns
70 %psbool = (
71 colorlinks => 0, # Set links in blue rather than black
74 # Known paper sizes
75 %papersizes = (
76 'a5' => [421, 595], # ISO half paper size
77 'b5' => [501, 709], # ISO small paper size
78 'a4' => [595, 842], # ISO standard paper size
79 'letter' => [612, 792], # US common paper size
80 'pa4' => [595, 792], # Compromise ("portable a4")
81 'b4' => [709,1002], # ISO intermediate paper size
82 'legal' => [612,1008], # US intermediate paper size
83 'a3' => [842,1190], # ISO double paper size
84 '11x17' => [792,1224], # US double paper size
87 # Canned header file
88 $headps = 'head.ps';
91 # Parse the command line
93 undef $input;
94 while ( $arg = shift(@ARGV) ) {
95 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
96 $parm = $2;
97 $true = ($1 eq '') ? 1 : 0;
98 if ( $true && defined($papersizes{$parm}) ) {
99 $psconf{pagewidth} = $papersizes{$parm}->[0];
100 $psconf{pageheight} = $papersizes{$parm}->[1];
101 } elsif ( defined($psbool{$parm}) ) {
102 $psbool{$parm} = $true;
103 } elsif ( $true && defined($psconf{$parm}) ) {
104 $psconf{$parm} = shift(@ARGV);
105 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
106 $metadata{$parm} = shift(@ARGV);
107 } elsif ( $parm eq 'headps' ) {
108 $headps = shift(@ARGV);
109 } else {
110 die "$0: Unknown option: $arg\n";
112 } else {
113 $input = $arg;
118 # Document formatting parameters
120 $paraskip = 6; # Space between paragraphs
121 $chapstart = 30; # Space before a chapter heading
122 $chapskip = 24; # Space after a chapter heading
123 $tocskip = 6; # Space between TOC entries
125 # Configure post-paragraph skips for each kind of paragraph
126 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
127 'head' => $paraskip, 'subh' => $paraskip,
128 'norm' => $paraskip, 'bull' => $paraskip,
129 'code' => $paraskip, 'toc0' => $tocskip,
130 'toc1' => $tocskip, 'toc2' => $tocskip);
132 # Custom encoding vector. This is basically the same as
133 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
134 # but with the "naked" accents at \200-\237 moved to the \000-\037
135 # range (ASCII control characters), and a few extra characters thrown
136 # in. It is basically a modified Windows 1252 codepage, minus, for
137 # now, the euro sign (\200 is reserved for euro.)
139 @NASMEncoding =
141 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
142 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
143 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
144 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
145 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
146 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
147 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
148 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
149 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
150 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
151 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
152 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
153 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
154 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
155 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
156 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
157 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
158 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
159 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
160 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
161 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
162 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
163 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
164 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
165 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
166 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
167 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
168 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
169 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
170 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
171 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
172 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
173 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
174 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
175 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
176 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
177 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
178 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
179 'thorn', 'ydieresis'
182 # Name-to-byte lookup hash
183 %charcode = ();
184 for ( $i = 0 ; $i < 256 ; $i++ ) {
185 $charcode{$NASMEncoding[$i]} = chr($i);
189 # First, format the stuff coming from the front end into
190 # a cleaner representation
192 if ( defined($input) ) {
193 sysopen(PARAS, $input, O_RDONLY) or
194 die "$0: cannot open $input: $!\n";
195 } else {
196 open(PARAS, "<&STDIN") or die "$0: $!\n";
198 while ( defined($line = <PARAS>) ) {
199 chomp $line;
200 $data = <PARAS>;
201 chomp $data;
202 if ( $line =~ /^meta :(.*)$/ ) {
203 $metakey = $1;
204 $metadata{$metakey} = $data;
205 } elsif ( $line =~ /^indx :(.*)$/ ) {
206 $ixentry = $1;
207 push(@ixentries, $ixentry);
208 $ixterms{$ixentry} = [split(/\037/, $data)];
209 # Look for commas. This is easier done on the string
210 # representation, so do it now.
211 if ( $data =~ /^(.*)\,\037sp\037/ ) {
212 $ixprefix = $1;
213 $ixprefix =~ s/\037n $//; # Discard possible font change at end
214 $ixhasprefix{$ixentry} = $ixprefix;
215 if ( !$ixprefixes{$ixprefix} ) {
216 $ixcommafirst{$ixentry}++;
218 $ixprefixes{$ixprefix}++;
219 } else {
220 # A complete term can also be used as a prefix
221 $ixprefixes{$data}++;
223 } else {
224 push(@ptypes, $line);
225 push(@paras, [split(/\037/, $data)]);
228 close(PARAS);
231 # Convert an integer to a chosen base
233 sub int2base($$) {
234 my($i,$b) = @_;
235 my($s) = '';
236 my($n) = '';
237 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
238 return '0' if ($i == 0);
239 if ( $i < 0 ) { $n = '-'; $i = -$i; }
240 while ( $i ) {
241 $s = substr($z,$i%$b,1) . $s;
242 $i = int($i/$b);
244 return $n.$s;
248 # Convert a string to a rendering array
250 sub string2array($)
252 my($s) = @_;
253 my(@a) = ();
255 $s =~ s/\B\-\-\B/$charcode{'emdash'}/g;
256 $s =~ s/\B\-\B/ $charcode{'endash'} /g;
258 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
259 push(@a, [0,$1]);
260 $s = $2;
263 return @a;
267 # Take a crossreference name and generate the PostScript name for it.
269 # This hack produces a somewhat smaller PDF...
270 #%ps_xref_list = ();
271 #$ps_xref_next = 0;
272 #sub ps_xref($) {
273 # my($s) = @_;
274 # my $q = $ps_xref_list{$s};
275 # return $q if ( defined($ps_xref_list{$s}) );
276 # $q = 'X'.int2base($ps_xref_next++, 52);
277 # $ps_xref_list{$s} = $q;
278 # return $q;
281 # Somewhat bigger PDF, but one which obeys # URLs
282 sub ps_xref($) {
283 return @_[0];
287 # Flow lines according to a particular font set and width
289 # A "font set" is represented as an array containing
290 # arrays of pairs: [<size>, <metricref>]
292 # Each line is represented as:
293 # [ [type,first|last,aux,fontset,page,ypos,optional col],
294 # [rendering array] ]
296 # A space character may be "squeezed" by up to this much
297 # (as a fraction of the normal width of a space.)
299 $ps_space_squeeze = 0.00; # Min space width 100%
300 sub ps_flow_lines($$$@) {
301 my($wid, $fontset, $type, @data) = @_;
302 my($fonts) = $$fontset{fonts};
303 my($e);
304 my($w) = 0; # Width of current line
305 my($sw) = 0; # Width of current line due to spaces
306 my(@l) = (); # Current line
307 my(@ls) = (); # Accumulated output lines
308 my(@xd) = (); # Metadata that goes with subsequent text
309 my $hasmarker = 0; # Line has -6 marker
310 my $pastmarker = 0; # -6 marker found
312 # If there is a -6 marker anywhere in the paragraph,
313 # *each line* output needs to have a -6 marker
314 foreach $e ( @data ) {
315 $hasmarker = 1 if ( $$e[0] == -6 );
318 $w = 0;
319 foreach $e ( @data ) {
320 if ( $$e[0] < 0 ) {
321 # Type is metadata. Zero width.
322 if ( $$e[0] == -6 ) {
323 $pastmarker = 1;
325 if ( $$e[0] == -1 || $$e[0] == -6 ) {
326 # -1 (end anchor) or -6 (marker) goes with the preceeding
327 # text, otherwise with the subsequent text
328 push(@l, $e);
329 } else {
330 push(@xd, $e);
332 } else {
333 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
334 \@NASMEncoding) *
335 ($fontset->{fonts}->[$$e[0]][0]/1000);
336 my $sp = $$e[1];
337 $sp =~ tr/[^ ]//d; # Delete nonspaces
338 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
339 \@NASMEncoding) *
340 ($fontset->{fonts}->[$$e[0]][0]/1000);
342 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
343 # Begin new line
344 # Search backwards for previous space chunk
345 my $lx = scalar(@l)-1;
346 my @rm = ();
347 while ( $lx >= 0 ) {
348 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
349 # Skip metadata
350 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
351 $lx--;
353 if ( $lx >= 0 ) {
354 if ( $l[$lx]->[1] eq ' ' ) {
355 splice(@l, $lx, 1);
356 @rm = splice(@l, $lx);
357 last; # Found place to break
358 } else {
359 $lx--;
364 # Now @l contains the stuff to remain on the old line
365 # If we broke the line inside a link, then split the link
366 # into two.
367 my $lkref = undef;
368 foreach my $lc ( @l ) {
369 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
370 $lkref = $lc;
371 } elsif ( $$lc[0] == -1 ) {
372 undef $lkref;
376 if ( defined($lkref) ) {
377 push(@l, [-1,undef]); # Terminate old reference
378 unshift(@rm, $lkref); # Duplicate reference on new line
381 if ( $hasmarker ) {
382 if ( $pastmarker ) {
383 unshift(@rm,[-6,undef]); # New line starts with marker
384 } else {
385 push(@l,[-6,undef]); # Old line ends with marker
389 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
390 @l = @rm;
392 $w = $sw = 0;
393 # Compute the width of the remainder array
394 for my $le ( @l ) {
395 if ( $$le[0] >= 0 ) {
396 my $xew = ps_width($$le[1],
397 $fontset->{fonts}->[$$le[0]][1],
398 \@NASMEncoding) *
399 ($fontset->{fonts}->[$$le[0]][0]/1000);
400 my $xsp = $$le[1];
401 $xsp =~ tr/[^ ]//d; # Delete nonspaces
402 my $xsw = ps_width($xsp,
403 $fontset->{fonts}->[$$le[0]][1],
404 \@NASMEncoding) *
405 ($fontset->{fonts}->[$$le[0]][0]/1000);
406 $w += $xew; $sw += $xsw;
410 push(@l, @xd); # Accumulated metadata
411 @xd = ();
412 if ( $$e[1] ne '' ) {
413 push(@l, $e);
414 $w += $ew; $sw += $esw;
418 push(@l,@xd);
419 if ( scalar(@l) ) {
420 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
423 # Mark the first line as first and the last line as last
424 if ( scalar(@ls) ) {
425 $ls[0]->[0]->[1] |= 1; # First in para
426 $ls[-1]->[0]->[1] |= 2; # Last in para
428 return @ls;
432 # Once we have broken things into lines, having multiple chunks
433 # with the same font index is no longer meaningful. Merge
434 # adjacent chunks to keep down the size of the whole file.
436 sub ps_merge_chunks(@) {
437 my(@ci) = @_;
438 my($c, $lc);
439 my(@co, $eco);
441 undef $lc;
442 @co = ();
443 $eco = -1; # Index of the last entry in @co
444 foreach $c ( @ci ) {
445 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
446 $co[$eco]->[1] .= $$c[1];
447 } else {
448 push(@co, $c); $eco++;
449 $lc = $$c[0];
452 return @co;
456 # Convert paragraphs to rendering arrays. Each
457 # element in the array contains (font, string),
458 # where font can be one of:
459 # -1 end link
460 # -2 begin crossref
461 # -3 begin weblink
462 # -4 index item anchor
463 # -5 crossref anchor
464 # -6 left/right marker (used in the index)
465 # -7 page link (used in the index)
466 # 0 normal
467 # 1 empatic (italic)
468 # 2 code (fixed spacing)
471 sub mkparaarray($@) {
472 my($ptype, @chunks) = @_;
474 my @para = ();
475 my $in_e = 0;
476 my $chunk;
478 if ( $ptype =~ /^code/ ) {
479 foreach $chunk ( @chunks ) {
480 push(@para, [2, $chunk]);
482 } else {
483 foreach $chunk ( @chunks ) {
484 my $type = substr($chunk,0,2);
485 my $text = substr($chunk,2);
487 if ( $type eq 'sp' ) {
488 push(@para, [$in_e?1:0, ' ']);
489 } elsif ( $type eq 'da' ) {
490 push(@para, [$in_e?1:0, $charcode{'endash'}]);
491 } elsif ( $type eq 'n ' ) {
492 push(@para, [0, $text]);
493 $in_e = 0;
494 } elsif ( $type =~ '^e' ) {
495 push(@para, [1, $text]);
496 $in_e = ($type eq 'es' || $type eq 'e ');
497 } elsif ( $type eq 'c ' ) {
498 push(@para, [2, $text]);
499 $in_e = 0;
500 } elsif ( $type eq 'x ' ) {
501 push(@para, [-2, ps_xref($text)]);
502 } elsif ( $type eq 'xe' ) {
503 push(@para, [-1, undef]);
504 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
505 $text =~ /\<(.*)\>(.*)$/;
506 my $link = $1; $text = $2;
507 push(@para, [-3, $link]);
508 push(@para, [($type eq 'wc') ? 2:0, $text]);
509 push(@para, [-1, undef]);
510 $in_e = 0;
511 } elsif ( $type eq 'i ' ) {
512 push(@para, [-4, $text]);
513 } else {
514 die "Unexpected paragraph chunk: $chunk";
518 return @para;
521 $npara = scalar(@paras);
522 for ( $i = 0 ; $i < $npara ; $i++ ) {
523 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
527 # This converts a rendering array to a simple string
529 sub ps_arraytostr(@) {
530 my $s = '';
531 my $c;
532 foreach $c ( @_ ) {
533 $s .= $$c[1] if ( $$c[0] >= 0 );
535 return $s;
539 # This generates a duplicate of a paragraph
541 sub ps_dup_para(@) {
542 my(@i) = @_;
543 my(@o) = ();
544 my($c);
546 foreach $c ( @i ) {
547 my @cc = @{$c};
548 push(@o, [@cc]);
550 return @o;
554 # This generates a duplicate of a paragraph, stripping anchor
555 # tags (-4 and -5)
557 sub ps_dup_para_noanchor(@) {
558 my(@i) = @_;
559 my(@o) = ();
560 my($c);
562 foreach $c ( @i ) {
563 my @cc = @{$c};
564 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
566 return @o;
570 # Scan for header paragraphs and fix up their contents;
571 # also generate table of contents and PDF bookmarks.
573 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
574 @tocptypes = ('chap');
575 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
576 %bookref = ();
577 for ( $i = 0 ; $i < $npara ; $i++ ) {
578 my $xtype = $ptypes[$i];
579 my $ptype = substr($xtype,0,4);
580 my $str;
581 my $book;
583 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
584 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
585 die "Bad para";
587 my $secn = $1;
588 my $sech = $2;
589 my $xref = ps_xref($sech);
590 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
592 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
593 push(@bookmarks, $book);
594 $bookref{$secn} = $book;
596 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
597 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
599 unshift(@{$paras[$i]},
600 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
601 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
602 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
603 die "Bad para";
605 my $secn = $1;
606 my $sech = $2;
607 my $xref = ps_xref($sech);
608 my $pref;
609 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
611 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
612 push(@bookmarks, $book);
613 $bookref{$secn} = $book;
614 $bookref{$pref}->[1]--; # Adjust count for parent node
616 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
617 push(@tocptypes,
618 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
620 unshift(@{$paras[$i]}, [-5, $xref]);
625 # Add TOC to beginning of paragraph list
627 unshift(@paras, @tocparas); undef @tocparas;
628 unshift(@ptypes, @tocptypes); undef @tocptypes;
631 # Add copyright notice to the beginning
633 @copyright_page =
634 ([[0, $charcode{'copyright'}],
635 [0, ' '], [0, $metadata{'year'}],
636 [0, ' '], string2array($metadata{'author'}),
637 [0, ' '], string2array($metadata{'copyright_tail'})],
638 [string2array($metadata{'license'})],
639 [string2array($metadata{'auxinfo'})]);
641 unshift(@paras, @copyright_page);
642 unshift(@ptypes, ('norm') x scalar(@copyright_page));
644 $npara = scalar(@paras);
647 # No lines generated, yet.
649 @pslines = ();
652 # Line Auxilliary Information Types
654 $AuxStr = 1; # String
655 $AuxPage = 2; # Page number (from xref)
656 $AuxPageStr = 3; # Page number as a PostScript string
657 $AuxXRef = 4; # Cross reference as a name
658 $AuxNum = 5; # Number
661 # Break or convert paragraphs into lines, and push them
662 # onto the @pslines array.
664 sub ps_break_lines($$) {
665 my ($paras,$ptypes) = @_;
667 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
668 my $bullwidth = $linewidth-$psconf{bulladj};
669 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
670 -$psconf{idxspace};
672 my $npara = scalar(@{$paras});
673 my $i;
675 for ( $i = 0 ; $i < $npara ; $i++ ) {
676 my $xtype = $ptypes->[$i];
677 my $ptype = substr($xtype,0,4);
678 my @data = @{$paras->[$i]};
679 my @ls = ();
680 if ( $ptype eq 'code' ) {
681 my $p;
682 # Code paragraph; each chunk is a line
683 foreach $p ( @data ) {
684 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
686 $ls[0]->[0]->[1] |= 1; # First in para
687 $ls[-1]->[0]->[1] |= 2; # Last in para
688 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
689 # Chapters are flowed normally, but in an unusual font
690 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
691 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
692 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
693 die "Bad para";
695 my $secn = $1;
696 my $sech = $2;
697 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
698 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
699 # We need the heading number as auxillary data
700 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
701 } elsif ( $ptype eq 'norm' ) {
702 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
703 } elsif ( $ptype eq 'bull' ) {
704 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
705 } elsif ( $ptype =~ /^toc/ ) {
706 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
707 die "Bad para";
709 my $xref = $1;
710 my $refname = $2.' ';
711 my $ntoc = substr($ptype,3,1)+0;
712 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
713 \@NASMEncoding) *
714 ($BodyFont{fonts}->[0][0]/1000);
716 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
717 $psconf{tocpnz}-$refwidth,
718 \%BodyFont, $ptype, @data);
720 # Auxilliary data: for the first line, the cross reference symbol
721 # and the reference name; for all lines but the first, the
722 # reference width; and for the last line, the page number
723 # as a string.
724 my $nl = scalar(@ls);
725 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
726 for ( $j = 1 ; $j < $nl ; $j++ ) {
727 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
729 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
730 } elsif ( $ptype =~ /^idx/ ) {
731 my $lvl = substr($ptype,3,1)+0;
733 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
734 \%BodyFont, $ptype, @data);
735 } else {
736 die "Unknown para type: $ptype";
738 # Merge adjacent identical chunks
739 foreach $l ( @ls ) {
740 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
742 push(@pslines,@ls);
746 # Break the main body text into lines.
747 ps_break_lines(\@paras, \@ptypes);
750 # Break lines in to pages
753 # Where to start on page 2, the copyright page
754 $curpage = 2; # Start on page 2
755 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
756 $psconf{startcopyright};
757 undef $columnstart; # Not outputting columnar text
758 undef $curcolumn; # Current column
759 $nlines = scalar(@pslines);
762 # This formats lines inside the global @pslines array into pages,
763 # updating the page and y-coordinate entries. Start at the
764 # $startline position in @pslines and go to but not including
765 # $endline. The global variables $curpage, $curypos, $columnstart
766 # and $curcolumn are updated appropriately.
768 sub ps_break_pages($$) {
769 my($startline, $endline) = @_;
771 # Paragraph types which should never be broken
772 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
773 # Paragraph types which are heading (meaning they should not be broken
774 # immediately after)
775 my $nobreakafter = "^(chap|appn|head|subh)\$";
776 # Paragraph types which should never be broken *before*
777 my $nobreakbefore = "^idx[1-9]\$";
778 # Paragraph types which are set in columnar format
779 my $columnregexp = "^idx.\$";
781 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
783 my $i;
785 for ( $i = $startline ; $i < $endline ; $i++ ) {
786 my $linfo = $pslines[$i]->[0];
787 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
788 && ($$linfo[1] & 1) ) {
789 # First line of a new chapter heading. Start a new page.
790 undef $columnstart;
791 $curpage++ if ( $curypos > 0 || defined($columnstart) );
792 $curypos = $chapstart;
793 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
794 undef $columnstart;
795 $curpage++;
796 $curypos = 0;
799 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
800 $columnstart = $curypos;
801 $curcolumn = 0;
804 # Adjust position by the appropriate leading
805 $curypos += $$linfo[3]->{leading};
807 # Record the page and y-position
808 $$linfo[4] = $curpage;
809 $$linfo[5] = $curypos;
810 $$linfo[6] = $curcolumn if ( defined($columnstart) );
812 if ( $curypos > $upageheight ) {
813 # We need to break the page before this line.
814 my $broken = 0; # No place found yet
815 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
816 my $linfo = $pslines[$i]->[0];
817 my $pinfo = $pslines[$i-1]->[0];
819 if ( $$linfo[1] == 2 ) {
820 # This would be an orphan, don't break.
821 } elsif ( $$linfo[1] & 1 ) {
822 # Sole line or start of paragraph. Break unless
823 # the previous line was part of a heading.
824 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
825 $$linfo[0] !~ /$nobreakbefore/o );
826 } else {
827 # Middle of paragraph. Break unless we're in a
828 # no-break paragraph, or the previous line would
829 # end up being a widow.
830 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
831 $$pinfo[1] != 1 );
833 $i--;
835 die "Nowhere to break page $curpage\n" if ( !$broken );
836 # Now $i should point to line immediately before the break, i.e.
837 # the next paragraph should be the first on the new page
838 if ( defined($columnstart) &&
839 ++$curcolumn < $psconf{idxcolumns} ) {
840 # We're actually breaking text into columns, not pages
841 $curypos = $columnstart;
842 } else {
843 undef $columnstart;
844 $curpage++;
845 $curypos = 0;
847 next;
850 # Add end of paragraph skip
851 if ( $$linfo[1] & 2 ) {
852 $curypos += $skiparray{$$linfo[0]};
857 ps_break_pages(0,$nlines); # Break the main text body into pages
860 # Find the page number of all the indices
862 %ps_xref_page = (); # Crossref anchor pages
863 %ps_index_pages = (); # Index item pages
864 $nlines = scalar(@pslines);
865 for ( $i = 0 ; $i < $nlines ; $i++ ) {
866 my $linfo = $pslines[$i]->[0];
867 foreach my $c ( @{$pslines[$i]->[1]} ) {
868 if ( $$c[0] == -4 ) {
869 if ( !defined($ps_index_pages{$$c[1]}) ) {
870 $ps_index_pages{$$c[1]} = [];
871 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
872 # Pages are emitted in order; if this is a duplicated
873 # entry it will be the last one
874 next; # Duplicate
876 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
877 } elsif ( $$c[0] == -5 ) {
878 $ps_xref_page{$$c[1]} = $$linfo[4];
884 # Emit index paragraphs
886 $startofindex = scalar(@pslines);
887 @ixparas = ([[-5,'index'],[0,'Index']]);
888 @ixptypes = ('chap');
890 foreach $k ( @ixentries ) {
891 my $n,$i;
892 my $ixptype = 'idx0';
893 my $prefix = $ixhasprefix{$k};
894 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
895 my $commapos = undef;
897 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
898 # This entry has a "hanging comma"
899 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
900 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
901 $ixpara[$i+1]->[1] eq ' ' ) {
902 $commapos = $i;
903 last;
907 if ( defined($commapos) ) {
908 if ( $ixcommafirst{$k} ) {
909 # This is the first entry; generate the
910 # "hanging comma" entry
911 my @precomma = splice(@ixpara,0,$commapos);
912 if ( $ixpara[0]->[1] eq ',' ) {
913 shift(@ixpara); # Discard lone comma
914 } else {
915 # Discard attached comma
916 $ixpara[0]->[1] =~ s/\,$//;
917 push(@precomma,shift(@ixpara));
919 push(@precomma, [-6,undef]);
920 push(@ixparas, [@precomma]);
921 push(@ixptypes, $ixptype);
922 shift(@ixpara); # Remove space
923 } else {
924 splice(@ixpara,0,$commapos+2);
926 $ixptype = 'idx1';
929 push(@ixpara, [-6,undef]); # Left/right marker
930 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
931 foreach $p ( @{$ps_index_pages{$k}} ) {
932 if ( $i++ == $n ) {
933 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
934 } else {
935 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
939 push(@ixparas, [@ixpara]);
940 push(@ixptypes, $ixptype);
944 # Flow index paragraphs into lines
946 ps_break_lines(\@ixparas, \@ixptypes);
949 # Format index into pages
951 $nlines = scalar(@pslines);
952 ps_break_pages($startofindex, $nlines);
955 # Push index onto bookmark list
957 push(@bookmarks, ['index', 0, 'Index']);
959 # Get the list of fonts used
960 %ps_all_fonts = ();
961 foreach $fset ( @AllFonts ) {
962 foreach $font ( @{$fset->{fonts}} ) {
963 $ps_all_fonts{$font->[1]->{name}}++;
967 # Emit the PostScript DSC header
968 print "%!PS-Adobe-3.0\n";
969 print "%%Pages: $curpage\n";
970 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
971 print "%%Creator: (NASM psflow.pl)\n";
972 print "%%DocumentData: Clean7Bit\n";
973 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
974 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
975 print "%%Orientation: Portrait\n";
976 print "%%PageOrder: Ascend\n";
977 print "%%EndComments\n";
978 print "%%BeginProlog\n";
980 # Emit the configurables as PostScript tokens
981 foreach $c ( keys(%psconf) ) {
982 print "/$c ", $psconf{$c}, " def\n";
984 foreach $c ( keys(%psbool) ) {
985 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
988 # Emit custom encoding vector
989 $zstr = '/NASMEncoding [ ';
990 foreach $c ( @NASMEncoding ) {
991 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
992 if ( length($zstr)+length($z) > 72 ) {
993 print $zstr,"\n";
994 $zstr = ' ';
996 $zstr .= $z;
998 print $zstr, "] def\n";
1000 # Font recoding routine
1001 # newname fontname --
1002 print "/nasmenc {\n";
1003 print " findfont dup length dict begin\n";
1004 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
1005 print " /Encoding NASMEncoding def\n";
1006 print " currentdict\n";
1007 print " end\n";
1008 print " definefont pop\n";
1009 print "} def\n";
1011 # Emit fontset definitions
1012 foreach $font ( keys(%ps_all_fonts) ) {
1013 print '/',$font,'-NASM /',$font," nasmenc\n";
1016 foreach $fset ( @AllFonts ) {
1017 my $i = 0;
1018 my @zfonts = ();
1019 foreach $font ( @{$fset->{fonts}} ) {
1020 print '/', $fset->{name}, $i, ' ',
1021 '/', $font->[1]->{name}, '-NASM findfont ',
1022 $font->[0], " scalefont def\n";
1023 push(@zfonts, $fset->{name}.$i);
1024 $i++;
1026 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
1029 # This is used by the bullet-paragraph PostScript methods
1030 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
1032 # Emit the canned PostScript prologue
1033 open(PSHEAD, '<', $headps)
1034 or die "$0: cannot open: $headps: $!\n";
1035 while ( defined($line = <PSHEAD>) ) {
1036 print $line;
1038 close(PSHEAD);
1039 print "%%EndProlog\n";
1041 # Generate a PostScript string
1042 sub ps_string($) {
1043 my ($s) = @_;
1044 my ($i,$c);
1045 my ($o) = '(';
1046 my ($l) = length($s);
1047 for ( $i = 0 ; $i < $l ; $i++ ) {
1048 $c = substr($s,$i,1);
1049 if ( ord($c) < 32 || ord($c) > 126 ) {
1050 $o .= sprintf("\\%03o", ord($c));
1051 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1052 $o .= "\\".$c;
1053 } else {
1054 $o .= $c;
1057 return $o.')';
1060 # Generate PDF bookmarks
1061 print "%%BeginSetup\n";
1062 foreach $b ( @bookmarks ) {
1063 print '[/Title ', ps_string($b->[2]), "\n";
1064 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1065 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1068 # Ask the PostScript interpreter for the proper size media
1069 print "setpagesize\n";
1070 print "%%EndSetup\n";
1072 # Start a PostScript page
1073 sub ps_start_page() {
1074 $ps_page++;
1075 print "%%Page: $ps_page $ps_page\n";
1076 print "%%BeginPageSetup\n";
1077 print "save\n";
1078 print "%%EndPageSetup\n";
1079 print '/', $ps_page, " pa\n";
1082 # End a PostScript page
1083 sub ps_end_page($) {
1084 my($pn) = @_;
1085 if ( $pn ) {
1086 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1088 print "restore showpage\n";
1091 $ps_page = 0;
1093 # Title page
1094 ps_start_page();
1095 $title = $metadata{'title'} || '';
1096 $title =~ s/ \- / $charcode{'emdash'} /;
1098 $subtitle = $metadata{'subtitle'} || '';
1099 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1101 # Print title
1102 print "/ti ", ps_string($title), " def\n";
1103 print "/sti ", ps_string($subtitle), " def\n";
1104 print "lmarg pageheight 2 mul 3 div moveto\n";
1105 print "tfont0 setfont\n";
1106 print "/title linkdest ti show\n";
1107 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1108 print "0 setlinecap 3 setlinewidth\n";
1109 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1110 print "hfont1 setfont sti stringwidth pop neg ",
1111 -$HeadFont{leading}, " rmoveto\n";
1112 print "sti show\n";
1114 # Print logo, if there is one
1115 # FIX: To be 100% correct, this should look for DocumentNeeded*
1116 # and DocumentFonts in the header of the EPSF and add those to the
1117 # global header.
1118 if ( defined($metadata{epslogo}) &&
1119 sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1120 my @eps = ();
1121 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1122 my $line;
1123 my $scale = 1;
1124 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1125 my $maxheight = $psconf{pageheight}/3-40;
1126 my $width, $height;
1127 my $x, $y;
1129 while ( defined($line = <EPS>) ) {
1130 last if ( $line =~ /^%%EOF/ );
1131 if ( !defined($bbllx) &&
1132 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1133 $bbllx = $1+0; $bblly = $2+0;
1134 $bburx = $3+0; $bbury = $4+0;
1136 push(@eps,$line);
1138 close(EPS);
1140 if ( defined($bbllx) ) {
1141 $width = $bburx-$bbllx;
1142 $height = $bbury-$bblly;
1144 if ( $width > $maxwidth ) {
1145 $scale = $maxwidth/$width;
1147 if ( $height*$scale > $maxheight ) {
1148 $scale = $maxheight/$height;
1151 $x = ($psconf{pagewidth}-$width*$scale)/2;
1152 $y = ($psconf{pageheight}-$height*$scale)/2;
1154 if ( defined($metadata{logoxadj}) ) {
1155 $x += $metadata{logoxadj};
1157 if ( defined($metadata{logoyadj}) ) {
1158 $y += $metadata{logoyadj};
1161 print "BeginEPSF\n";
1162 print $x, ' ', $y, " translate\n";
1163 print $scale, " dup scale\n" unless ( $scale == 1 );
1164 print -$bbllx, ' ', -$bblly, " translate\n";
1165 print "$bbllx $bblly moveto\n";
1166 print "$bburx $bblly lineto\n";
1167 print "$bburx $bbury lineto\n";
1168 print "$bbllx $bbury lineto\n";
1169 print "$bbllx $bblly lineto clip newpath\n";
1170 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1171 print @eps;
1172 print "%%EndDocument\n";
1173 print "EndEPSF\n";
1176 ps_end_page(0);
1178 # Emit the rest of the document (page 2 and on)
1179 $curpage = 2;
1180 ps_start_page();
1181 foreach $line ( @pslines ) {
1182 my $linfo = $line->[0];
1184 if ( $$linfo[4] != $curpage ) {
1185 ps_end_page($curpage > 2);
1186 ps_start_page();
1187 $curpage = $$linfo[4];
1190 print '[';
1191 my $curfont = 0;
1192 foreach my $c ( @{$line->[1]} ) {
1193 if ( $$c[0] >= 0 ) {
1194 if ( $curfont != $$c[0] ) {
1195 print ($curfont = $$c[0]);
1197 print ps_string($$c[1]);
1198 } elsif ( $$c[0] == -1 ) {
1199 print '{el}'; # End link
1200 } elsif ( $$c[0] == -2 ) {
1201 print '{/',$$c[1],' xl}'; # xref link
1202 } elsif ( $$c[0] == -3 ) {
1203 print '{',ps_string($$c[1]),'wl}'; # web link
1204 } elsif ( $$c[0] == -4 ) {
1205 # Index anchor -- ignore
1206 } elsif ( $$c[0] == -5 ) {
1207 print '{/',$$c[1],' xa}'; #xref anchor
1208 } elsif ( $$c[0] == -6 ) {
1209 print ']['; # Start a new array
1210 $curfont = 0;
1211 } elsif ( $$c[0] == -7 ) {
1212 print '{/',$$c[1],' pl}'; # page link
1213 } else {
1214 die "Unknown annotation";
1217 print ']';
1218 if ( defined($$linfo[2]) ) {
1219 foreach my $x ( @{$$linfo[2]} ) {
1220 if ( $$x[0] == $AuxStr ) {
1221 print ps_string($$x[1]);
1222 } elsif ( $$x[0] == $AuxPage ) {
1223 print $ps_xref_page{$$x[1]},' ';
1224 } elsif ( $$x[0] == $AuxPageStr ) {
1225 print ps_string($ps_xref_page{$$x[1]});
1226 } elsif ( $$x[0] == $AuxXRef ) {
1227 print '/',ps_xref($$x[1]),' ';
1228 } elsif ( $$x[0] == $AuxNum ) {
1229 print $$x[1],' ';
1230 } else {
1231 die "Unknown auxilliary data type";
1235 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1236 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1237 print ' ', $$linfo[0].$$linfo[1], "\n";
1240 ps_end_page(1);
1241 print "%%EOF\n";