Add support for UTF-16BE and UTF-32BE
[nasm/sigaren-mirror.git] / doc / genps.pl
blob222db8aabe08f9bcd3e32806668f2ba0d1c40a8b
1 #!/usr/bin/perl
2 ## --------------------------------------------------------------------------
3 ##
4 ## Copyright 1996-2009 The NASM Authors - All Rights Reserved
5 ## See the file AUTHORS included with the NASM distribution for
6 ## the specific copyright holders.
7 ##
8 ## Redistribution and use in source and binary forms, with or without
9 ## modification, are permitted provided that the following
10 ## conditions are met:
12 ## * Redistributions of source code must retain the above copyright
13 ## notice, this list of conditions and the following disclaimer.
14 ## * Redistributions in binary form must reproduce the above
15 ## copyright notice, this list of conditions and the following
16 ## disclaimer in the documentation and/or other materials provided
17 ## with the distribution.
18 ##
19 ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20 ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31 ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 ## --------------------------------------------------------------------------
36 # Format the documentation as PostScript
39 use Env;
40 use lib $srcdir;
42 require 'psfonts.ph'; # The fonts we want to use
43 require 'pswidth.ph'; # PostScript string width
45 use Fcntl;
48 # PostScript configurables; these values are also available to the
49 # PostScript code itself
51 %psconf = (
52 pagewidth => 595, # Page width in PostScript points
53 pageheight => 792, # Page height in PostScript points
54 lmarg => 100, # Left margin in PostScript points
55 rmarg => 50, # Right margin in PostScript points
56 topmarg => 100, # Top margin in PostScript points
57 botmarg => 100, # Bottom margin in PostScript points
58 plmarg => 50, # Page number position relative to left margin
59 prmarg => 0, # Page number position relative to right margin
60 pymarg => 50, # Page number position relative to bot margin
61 startcopyright => 75, # How much above the bottom margin is the
62 # copyright notice stuff
63 bulladj => 12, # How much to indent a bullet paragraph
64 tocind => 12, # TOC indentation per level
65 tocpnz => 24, # Width of TOC page number only zone
66 tocdots => 8, # Spacing between TOC dots
67 idxspace => 24, # Minimum space between index title and pg#
68 idxindent => 24, # How much to indent a subindex entry
69 idxgutter => 24, # Space between index columns
70 idxcolumns => 2, # Number of index columns
73 %psbool = (
74 colorlinks => 0, # Set links in blue rather than black
77 # Known paper sizes
78 %papersizes = (
79 'a5' => [421, 595], # ISO half paper size
80 'b5' => [501, 709], # ISO small paper size
81 'a4' => [595, 842], # ISO standard paper size
82 'letter' => [612, 792], # US common paper size
83 'pa4' => [595, 792], # Compromise ("portable a4")
84 'b4' => [709,1002], # ISO intermediate paper size
85 'legal' => [612,1008], # US intermediate paper size
86 'a3' => [842,1190], # ISO double paper size
87 '11x17' => [792,1224], # US double paper size
91 # Parse the command line
93 undef $input;
94 while ( $arg = shift(@ARGV) ) {
95 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
96 $parm = $2;
97 $true = ($1 eq '') ? 1 : 0;
98 if ( $true && defined($papersizes{$parm}) ) {
99 $psconf{pagewidth} = $papersizes{$parm}->[0];
100 $psconf{pageheight} = $papersizes{$parm}->[1];
101 } elsif ( defined($psbool{$parm}) ) {
102 $psbool{$parm} = $true;
103 } elsif ( $true && defined($psconf{$parm}) ) {
104 $psconf{$parm} = shift(@ARGV);
105 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
106 $metadata{$parm} = shift(@ARGV);
107 } else {
108 die "$0: Unknown option: $arg\n";
110 } else {
111 $input = $arg;
116 # Document formatting parameters
118 $paraskip = 6; # Space between paragraphs
119 $chapstart = 30; # Space before a chapter heading
120 $chapskip = 24; # Space after a chapter heading
121 $tocskip = 6; # Space between TOC entries
123 # Configure post-paragraph skips for each kind of paragraph
124 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
125 'head' => $paraskip, 'subh' => $paraskip,
126 'norm' => $paraskip, 'bull' => $paraskip,
127 'code' => $paraskip, 'toc0' => $tocskip,
128 'toc1' => $tocskip, 'toc2' => $tocskip);
130 # Custom encoding vector. This is basically the same as
131 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
132 # but with the "naked" accents at \200-\237 moved to the \000-\037
133 # range (ASCII control characters), and a few extra characters thrown
134 # in. It is basically a modified Windows 1252 codepage, minus, for
135 # now, the euro sign (\200 is reserved for euro.)
137 @NASMEncoding =
139 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
140 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
141 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
142 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
143 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
144 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
145 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
146 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
147 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
148 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
149 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
150 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
151 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
152 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
153 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
154 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
155 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
156 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
157 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
158 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
159 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
160 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
161 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
162 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
163 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
164 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
165 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
166 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
167 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
168 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
169 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
170 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
171 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
172 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
173 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
174 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
175 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
176 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
177 'thorn', 'ydieresis'
180 # Name-to-byte lookup hash
181 %charcode = ();
182 for ( $i = 0 ; $i < 256 ; $i++ ) {
183 $charcode{$NASMEncoding[$i]} = chr($i);
187 # First, format the stuff coming from the front end into
188 # a cleaner representation
190 if ( defined($input) ) {
191 sysopen(PARAS, $input, O_RDONLY) or
192 die "$0: cannot open $input: $!\n";
193 } else {
194 open(PARAS, "<&STDIN") or die "$0: $!\n";
196 while ( defined($line = <PARAS>) ) {
197 chomp $line;
198 $data = <PARAS>;
199 chomp $data;
200 if ( $line =~ /^meta :(.*)$/ ) {
201 $metakey = $1;
202 $metadata{$metakey} = $data;
203 } elsif ( $line =~ /^indx :(.*)$/ ) {
204 $ixentry = $1;
205 push(@ixentries, $ixentry);
206 $ixterms{$ixentry} = [split(/\037/, $data)];
207 # Look for commas. This is easier done on the string
208 # representation, so do it now.
209 if ( $data =~ /^(.*)\,\037sp\037/ ) {
210 $ixprefix = $1;
211 $ixprefix =~ s/\037n $//; # Discard possible font change at end
212 $ixhasprefix{$ixentry} = $ixprefix;
213 if ( !$ixprefixes{$ixprefix} ) {
214 $ixcommafirst{$ixentry}++;
216 $ixprefixes{$ixprefix}++;
217 } else {
218 # A complete term can also be used as a prefix
219 $ixprefixes{$data}++;
221 } else {
222 push(@ptypes, $line);
223 push(@paras, [split(/\037/, $data)]);
226 close(PARAS);
229 # Convert an integer to a chosen base
231 sub int2base($$) {
232 my($i,$b) = @_;
233 my($s) = '';
234 my($n) = '';
235 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
236 return '0' if ($i == 0);
237 if ( $i < 0 ) { $n = '-'; $i = -$i; }
238 while ( $i ) {
239 $s = substr($z,$i%$b,1) . $s;
240 $i = int($i/$b);
242 return $n.$s;
246 # Convert a string to a rendering array
248 sub string2array($)
250 my($s) = @_;
251 my(@a) = ();
253 $s =~ s/\B\-\-\B/$charcode{'emdash'}/g;
254 $s =~ s/\B\-\B/ $charcode{'endash'} /g;
256 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
257 push(@a, [0,$1]);
258 $s = $2;
261 return @a;
265 # Take a crossreference name and generate the PostScript name for it.
267 # This hack produces a somewhat smaller PDF...
268 #%ps_xref_list = ();
269 #$ps_xref_next = 0;
270 #sub ps_xref($) {
271 # my($s) = @_;
272 # my $q = $ps_xref_list{$s};
273 # return $q if ( defined($ps_xref_list{$s}) );
274 # $q = 'X'.int2base($ps_xref_next++, 52);
275 # $ps_xref_list{$s} = $q;
276 # return $q;
279 # Somewhat bigger PDF, but one which obeys # URLs
280 sub ps_xref($) {
281 return @_[0];
285 # Flow lines according to a particular font set and width
287 # A "font set" is represented as an array containing
288 # arrays of pairs: [<size>, <metricref>]
290 # Each line is represented as:
291 # [ [type,first|last,aux,fontset,page,ypos,optional col],
292 # [rendering array] ]
294 # A space character may be "squeezed" by up to this much
295 # (as a fraction of the normal width of a space.)
297 $ps_space_squeeze = 0.00; # Min space width 100%
298 sub ps_flow_lines($$$@) {
299 my($wid, $fontset, $type, @data) = @_;
300 my($fonts) = $$fontset{fonts};
301 my($e);
302 my($w) = 0; # Width of current line
303 my($sw) = 0; # Width of current line due to spaces
304 my(@l) = (); # Current line
305 my(@ls) = (); # Accumulated output lines
306 my(@xd) = (); # Metadata that goes with subsequent text
307 my $hasmarker = 0; # Line has -6 marker
308 my $pastmarker = 0; # -6 marker found
310 # If there is a -6 marker anywhere in the paragraph,
311 # *each line* output needs to have a -6 marker
312 foreach $e ( @data ) {
313 $hasmarker = 1 if ( $$e[0] == -6 );
316 $w = 0;
317 foreach $e ( @data ) {
318 if ( $$e[0] < 0 ) {
319 # Type is metadata. Zero width.
320 if ( $$e[0] == -6 ) {
321 $pastmarker = 1;
323 if ( $$e[0] == -1 || $$e[0] == -6 ) {
324 # -1 (end anchor) or -6 (marker) goes with the preceeding
325 # text, otherwise with the subsequent text
326 push(@l, $e);
327 } else {
328 push(@xd, $e);
330 } else {
331 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
332 \@NASMEncoding) *
333 ($fontset->{fonts}->[$$e[0]][0]/1000);
334 my $sp = $$e[1];
335 $sp =~ tr/[^ ]//d; # Delete nonspaces
336 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
337 \@NASMEncoding) *
338 ($fontset->{fonts}->[$$e[0]][0]/1000);
340 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
341 # Begin new line
342 # Search backwards for previous space chunk
343 my $lx = scalar(@l)-1;
344 my @rm = ();
345 while ( $lx >= 0 ) {
346 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
347 # Skip metadata
348 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
349 $lx--;
351 if ( $lx >= 0 ) {
352 if ( $l[$lx]->[1] eq ' ' ) {
353 splice(@l, $lx, 1);
354 @rm = splice(@l, $lx);
355 last; # Found place to break
356 } else {
357 $lx--;
362 # Now @l contains the stuff to remain on the old line
363 # If we broke the line inside a link, then split the link
364 # into two.
365 my $lkref = undef;
366 foreach my $lc ( @l ) {
367 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
368 $lkref = $lc;
369 } elsif ( $$lc[0] == -1 ) {
370 undef $lkref;
374 if ( defined($lkref) ) {
375 push(@l, [-1,undef]); # Terminate old reference
376 unshift(@rm, $lkref); # Duplicate reference on new line
379 if ( $hasmarker ) {
380 if ( $pastmarker ) {
381 unshift(@rm,[-6,undef]); # New line starts with marker
382 } else {
383 push(@l,[-6,undef]); # Old line ends with marker
387 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
388 @l = @rm;
390 $w = $sw = 0;
391 # Compute the width of the remainder array
392 for my $le ( @l ) {
393 if ( $$le[0] >= 0 ) {
394 my $xew = ps_width($$le[1],
395 $fontset->{fonts}->[$$le[0]][1],
396 \@NASMEncoding) *
397 ($fontset->{fonts}->[$$le[0]][0]/1000);
398 my $xsp = $$le[1];
399 $xsp =~ tr/[^ ]//d; # Delete nonspaces
400 my $xsw = ps_width($xsp,
401 $fontset->{fonts}->[$$le[0]][1],
402 \@NASMEncoding) *
403 ($fontset->{fonts}->[$$le[0]][0]/1000);
404 $w += $xew; $sw += $xsw;
408 push(@l, @xd); # Accumulated metadata
409 @xd = ();
410 if ( $$e[1] ne '' ) {
411 push(@l, $e);
412 $w += $ew; $sw += $esw;
416 push(@l,@xd);
417 if ( scalar(@l) ) {
418 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
421 # Mark the first line as first and the last line as last
422 if ( scalar(@ls) ) {
423 $ls[0]->[0]->[1] |= 1; # First in para
424 $ls[-1]->[0]->[1] |= 2; # Last in para
426 return @ls;
430 # Once we have broken things into lines, having multiple chunks
431 # with the same font index is no longer meaningful. Merge
432 # adjacent chunks to keep down the size of the whole file.
434 sub ps_merge_chunks(@) {
435 my(@ci) = @_;
436 my($c, $lc);
437 my(@co, $eco);
439 undef $lc;
440 @co = ();
441 $eco = -1; # Index of the last entry in @co
442 foreach $c ( @ci ) {
443 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
444 $co[$eco]->[1] .= $$c[1];
445 } else {
446 push(@co, $c); $eco++;
447 $lc = $$c[0];
450 return @co;
454 # Convert paragraphs to rendering arrays. Each
455 # element in the array contains (font, string),
456 # where font can be one of:
457 # -1 end link
458 # -2 begin crossref
459 # -3 begin weblink
460 # -4 index item anchor
461 # -5 crossref anchor
462 # -6 left/right marker (used in the index)
463 # -7 page link (used in the index)
464 # 0 normal
465 # 1 empatic (italic)
466 # 2 code (fixed spacing)
469 sub mkparaarray($@) {
470 my($ptype, @chunks) = @_;
472 my @para = ();
473 my $in_e = 0;
474 my $chunk;
476 if ( $ptype =~ /^code/ ) {
477 foreach $chunk ( @chunks ) {
478 push(@para, [2, $chunk]);
480 } else {
481 foreach $chunk ( @chunks ) {
482 my $type = substr($chunk,0,2);
483 my $text = substr($chunk,2);
485 if ( $type eq 'sp' ) {
486 push(@para, [$in_e?1:0, ' ']);
487 } elsif ( $type eq 'da' ) {
488 push(@para, [$in_e?1:0, $charcode{'endash'}]);
489 } elsif ( $type eq 'n ' ) {
490 push(@para, [0, $text]);
491 $in_e = 0;
492 } elsif ( $type =~ '^e' ) {
493 push(@para, [1, $text]);
494 $in_e = ($type eq 'es' || $type eq 'e ');
495 } elsif ( $type eq 'c ' ) {
496 push(@para, [2, $text]);
497 $in_e = 0;
498 } elsif ( $type eq 'x ' ) {
499 push(@para, [-2, ps_xref($text)]);
500 } elsif ( $type eq 'xe' ) {
501 push(@para, [-1, undef]);
502 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
503 $text =~ /\<(.*)\>(.*)$/;
504 my $link = $1; $text = $2;
505 push(@para, [-3, $link]);
506 push(@para, [($type eq 'wc') ? 2:0, $text]);
507 push(@para, [-1, undef]);
508 $in_e = 0;
509 } elsif ( $type eq 'i ' ) {
510 push(@para, [-4, $text]);
511 } else {
512 die "Unexpected paragraph chunk: $chunk";
516 return @para;
519 $npara = scalar(@paras);
520 for ( $i = 0 ; $i < $npara ; $i++ ) {
521 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
525 # This converts a rendering array to a simple string
527 sub ps_arraytostr(@) {
528 my $s = '';
529 my $c;
530 foreach $c ( @_ ) {
531 $s .= $$c[1] if ( $$c[0] >= 0 );
533 return $s;
537 # This generates a duplicate of a paragraph
539 sub ps_dup_para(@) {
540 my(@i) = @_;
541 my(@o) = ();
542 my($c);
544 foreach $c ( @i ) {
545 my @cc = @{$c};
546 push(@o, [@cc]);
548 return @o;
552 # This generates a duplicate of a paragraph, stripping anchor
553 # tags (-4 and -5)
555 sub ps_dup_para_noanchor(@) {
556 my(@i) = @_;
557 my(@o) = ();
558 my($c);
560 foreach $c ( @i ) {
561 my @cc = @{$c};
562 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
564 return @o;
568 # Scan for header paragraphs and fix up their contents;
569 # also generate table of contents and PDF bookmarks.
571 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
572 @tocptypes = ('chap');
573 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
574 %bookref = ();
575 for ( $i = 0 ; $i < $npara ; $i++ ) {
576 my $xtype = $ptypes[$i];
577 my $ptype = substr($xtype,0,4);
578 my $str;
579 my $book;
581 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
582 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
583 die "Bad para";
585 my $secn = $1;
586 my $sech = $2;
587 my $xref = ps_xref($sech);
588 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
590 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
591 push(@bookmarks, $book);
592 $bookref{$secn} = $book;
594 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
595 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
597 unshift(@{$paras[$i]},
598 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
599 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
600 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
601 die "Bad para";
603 my $secn = $1;
604 my $sech = $2;
605 my $xref = ps_xref($sech);
606 my $pref;
607 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
609 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
610 push(@bookmarks, $book);
611 $bookref{$secn} = $book;
612 $bookref{$pref}->[1]--; # Adjust count for parent node
614 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
615 push(@tocptypes,
616 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
618 unshift(@{$paras[$i]}, [-5, $xref]);
623 # Add TOC to beginning of paragraph list
625 unshift(@paras, @tocparas); undef @tocparas;
626 unshift(@ptypes, @tocptypes); undef @tocptypes;
629 # Add copyright notice to the beginning
631 @copyright_page =
632 ([[0, $charcode{'copyright'}],
633 [0, ' '], [0, $metadata{'year'}],
634 [0, ' '], string2array($metadata{'author'}),
635 [0, ' '], string2array($metadata{'copyright_tail'})],
636 [string2array($metadata{'license'})],
637 [string2array($metadata{'auxinfo'})]);
639 unshift(@paras, @copyright_page);
640 unshift(@ptypes, ('norm') x scalar(@copyright_page));
642 $npara = scalar(@paras);
645 # No lines generated, yet.
647 @pslines = ();
650 # Line Auxilliary Information Types
652 $AuxStr = 1; # String
653 $AuxPage = 2; # Page number (from xref)
654 $AuxPageStr = 3; # Page number as a PostScript string
655 $AuxXRef = 4; # Cross reference as a name
656 $AuxNum = 5; # Number
659 # Break or convert paragraphs into lines, and push them
660 # onto the @pslines array.
662 sub ps_break_lines($$) {
663 my ($paras,$ptypes) = @_;
665 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
666 my $bullwidth = $linewidth-$psconf{bulladj};
667 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
668 -$psconf{idxspace};
670 my $npara = scalar(@{$paras});
671 my $i;
673 for ( $i = 0 ; $i < $npara ; $i++ ) {
674 my $xtype = $ptypes->[$i];
675 my $ptype = substr($xtype,0,4);
676 my @data = @{$paras->[$i]};
677 my @ls = ();
678 if ( $ptype eq 'code' ) {
679 my $p;
680 # Code paragraph; each chunk is a line
681 foreach $p ( @data ) {
682 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
684 $ls[0]->[0]->[1] |= 1; # First in para
685 $ls[-1]->[0]->[1] |= 2; # Last in para
686 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
687 # Chapters are flowed normally, but in an unusual font
688 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
689 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
690 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
691 die "Bad para";
693 my $secn = $1;
694 my $sech = $2;
695 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
696 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
697 # We need the heading number as auxillary data
698 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
699 } elsif ( $ptype eq 'norm' ) {
700 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
701 } elsif ( $ptype eq 'bull' ) {
702 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
703 } elsif ( $ptype =~ /^toc/ ) {
704 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
705 die "Bad para";
707 my $xref = $1;
708 my $refname = $2.' ';
709 my $ntoc = substr($ptype,3,1)+0;
710 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
711 \@NASMEncoding) *
712 ($BodyFont{fonts}->[0][0]/1000);
714 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
715 $psconf{tocpnz}-$refwidth,
716 \%BodyFont, $ptype, @data);
718 # Auxilliary data: for the first line, the cross reference symbol
719 # and the reference name; for all lines but the first, the
720 # reference width; and for the last line, the page number
721 # as a string.
722 my $nl = scalar(@ls);
723 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
724 for ( $j = 1 ; $j < $nl ; $j++ ) {
725 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
727 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
728 } elsif ( $ptype =~ /^idx/ ) {
729 my $lvl = substr($ptype,3,1)+0;
731 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
732 \%BodyFont, $ptype, @data);
733 } else {
734 die "Unknown para type: $ptype";
736 # Merge adjacent identical chunks
737 foreach $l ( @ls ) {
738 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
740 push(@pslines,@ls);
744 # Break the main body text into lines.
745 ps_break_lines(\@paras, \@ptypes);
748 # Break lines in to pages
751 # Where to start on page 2, the copyright page
752 $curpage = 2; # Start on page 2
753 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
754 $psconf{startcopyright};
755 undef $columnstart; # Not outputting columnar text
756 undef $curcolumn; # Current column
757 $nlines = scalar(@pslines);
760 # This formats lines inside the global @pslines array into pages,
761 # updating the page and y-coordinate entries. Start at the
762 # $startline position in @pslines and go to but not including
763 # $endline. The global variables $curpage, $curypos, $columnstart
764 # and $curcolumn are updated appropriately.
766 sub ps_break_pages($$) {
767 my($startline, $endline) = @_;
769 # Paragraph types which should never be broken
770 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
771 # Paragraph types which are heading (meaning they should not be broken
772 # immediately after)
773 my $nobreakafter = "^(chap|appn|head|subh)\$";
774 # Paragraph types which should never be broken *before*
775 my $nobreakbefore = "^idx[1-9]\$";
776 # Paragraph types which are set in columnar format
777 my $columnregexp = "^idx.\$";
779 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
781 my $i;
783 for ( $i = $startline ; $i < $endline ; $i++ ) {
784 my $linfo = $pslines[$i]->[0];
785 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
786 && ($$linfo[1] & 1) ) {
787 # First line of a new chapter heading. Start a new page.
788 undef $columnstart;
789 $curpage++ if ( $curypos > 0 || defined($columnstart) );
790 $curypos = $chapstart;
791 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
792 undef $columnstart;
793 $curpage++;
794 $curypos = 0;
797 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
798 $columnstart = $curypos;
799 $curcolumn = 0;
802 # Adjust position by the appropriate leading
803 $curypos += $$linfo[3]->{leading};
805 # Record the page and y-position
806 $$linfo[4] = $curpage;
807 $$linfo[5] = $curypos;
808 $$linfo[6] = $curcolumn if ( defined($columnstart) );
810 if ( $curypos > $upageheight ) {
811 # We need to break the page before this line.
812 my $broken = 0; # No place found yet
813 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
814 my $linfo = $pslines[$i]->[0];
815 my $pinfo = $pslines[$i-1]->[0];
817 if ( $$linfo[1] == 2 ) {
818 # This would be an orphan, don't break.
819 } elsif ( $$linfo[1] & 1 ) {
820 # Sole line or start of paragraph. Break unless
821 # the previous line was part of a heading.
822 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
823 $$linfo[0] !~ /$nobreakbefore/o );
824 } else {
825 # Middle of paragraph. Break unless we're in a
826 # no-break paragraph, or the previous line would
827 # end up being a widow.
828 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
829 $$pinfo[1] != 1 );
831 $i--;
833 die "Nowhere to break page $curpage\n" if ( !$broken );
834 # Now $i should point to line immediately before the break, i.e.
835 # the next paragraph should be the first on the new page
836 if ( defined($columnstart) &&
837 ++$curcolumn < $psconf{idxcolumns} ) {
838 # We're actually breaking text into columns, not pages
839 $curypos = $columnstart;
840 } else {
841 undef $columnstart;
842 $curpage++;
843 $curypos = 0;
845 next;
848 # Add end of paragraph skip
849 if ( $$linfo[1] & 2 ) {
850 $curypos += $skiparray{$$linfo[0]};
855 ps_break_pages(0,$nlines); # Break the main text body into pages
858 # Find the page number of all the indices
860 %ps_xref_page = (); # Crossref anchor pages
861 %ps_index_pages = (); # Index item pages
862 $nlines = scalar(@pslines);
863 for ( $i = 0 ; $i < $nlines ; $i++ ) {
864 my $linfo = $pslines[$i]->[0];
865 foreach my $c ( @{$pslines[$i]->[1]} ) {
866 if ( $$c[0] == -4 ) {
867 if ( !defined($ps_index_pages{$$c[1]}) ) {
868 $ps_index_pages{$$c[1]} = [];
869 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
870 # Pages are emitted in order; if this is a duplicated
871 # entry it will be the last one
872 next; # Duplicate
874 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
875 } elsif ( $$c[0] == -5 ) {
876 $ps_xref_page{$$c[1]} = $$linfo[4];
882 # Emit index paragraphs
884 $startofindex = scalar(@pslines);
885 @ixparas = ([[-5,'index'],[0,'Index']]);
886 @ixptypes = ('chap');
888 foreach $k ( @ixentries ) {
889 my $n,$i;
890 my $ixptype = 'idx0';
891 my $prefix = $ixhasprefix{$k};
892 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
893 my $commapos = undef;
895 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
896 # This entry has a "hanging comma"
897 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
898 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
899 $ixpara[$i+1]->[1] eq ' ' ) {
900 $commapos = $i;
901 last;
905 if ( defined($commapos) ) {
906 if ( $ixcommafirst{$k} ) {
907 # This is the first entry; generate the
908 # "hanging comma" entry
909 my @precomma = splice(@ixpara,0,$commapos);
910 if ( $ixpara[0]->[1] eq ',' ) {
911 shift(@ixpara); # Discard lone comma
912 } else {
913 # Discard attached comma
914 $ixpara[0]->[1] =~ s/\,$//;
915 push(@precomma,shift(@ixpara));
917 push(@precomma, [-6,undef]);
918 push(@ixparas, [@precomma]);
919 push(@ixptypes, $ixptype);
920 shift(@ixpara); # Remove space
921 } else {
922 splice(@ixpara,0,$commapos+2);
924 $ixptype = 'idx1';
927 push(@ixpara, [-6,undef]); # Left/right marker
928 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
929 foreach $p ( @{$ps_index_pages{$k}} ) {
930 if ( $i++ == $n ) {
931 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
932 } else {
933 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
937 push(@ixparas, [@ixpara]);
938 push(@ixptypes, $ixptype);
942 # Flow index paragraphs into lines
944 ps_break_lines(\@ixparas, \@ixptypes);
947 # Format index into pages
949 $nlines = scalar(@pslines);
950 ps_break_pages($startofindex, $nlines);
953 # Push index onto bookmark list
955 push(@bookmarks, ['index', 0, 'Index']);
957 # Get the list of fonts used
958 %ps_all_fonts = ();
959 foreach $fset ( @AllFonts ) {
960 foreach $font ( @{$fset->{fonts}} ) {
961 $ps_all_fonts{$font->[1]->{name}}++;
965 # Emit the PostScript DSC header
966 print "%!PS-Adobe-3.0\n";
967 print "%%Pages: $curpage\n";
968 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
969 print "%%Creator: (NASM psflow.pl)\n";
970 print "%%DocumentData: Clean7Bit\n";
971 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
972 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
973 print "%%Orientation: Portrait\n";
974 print "%%PageOrder: Ascend\n";
975 print "%%EndComments\n";
976 print "%%BeginProlog\n";
978 # Emit the configurables as PostScript tokens
979 foreach $c ( keys(%psconf) ) {
980 print "/$c ", $psconf{$c}, " def\n";
982 foreach $c ( keys(%psbool) ) {
983 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
986 # Emit custom encoding vector
987 $zstr = '/NASMEncoding [ ';
988 foreach $c ( @NASMEncoding ) {
989 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
990 if ( length($zstr)+length($z) > 72 ) {
991 print $zstr,"\n";
992 $zstr = ' ';
994 $zstr .= $z;
996 print $zstr, "] def\n";
998 # Font recoding routine
999 # newname fontname --
1000 print "/nasmenc {\n";
1001 print " findfont dup length dict begin\n";
1002 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
1003 print " /Encoding NASMEncoding def\n";
1004 print " currentdict\n";
1005 print " end\n";
1006 print " definefont pop\n";
1007 print "} def\n";
1009 # Emit fontset definitions
1010 foreach $font ( keys(%ps_all_fonts) ) {
1011 print '/',$font,'-NASM /',$font," nasmenc\n";
1014 foreach $fset ( @AllFonts ) {
1015 my $i = 0;
1016 my @zfonts = ();
1017 foreach $font ( @{$fset->{fonts}} ) {
1018 print '/', $fset->{name}, $i, ' ',
1019 '/', $font->[1]->{name}, '-NASM findfont ',
1020 $font->[0], " scalefont def\n";
1021 push(@zfonts, $fset->{name}.$i);
1022 $i++;
1024 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
1027 # This is used by the bullet-paragraph PostScript methods
1028 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
1030 # Emit the canned PostScript prologue
1031 open(PSHEAD, "< head.ps");
1032 while ( defined($line = <PSHEAD>) ) {
1033 print $line;
1035 close(PSHEAD);
1036 print "%%EndProlog\n";
1038 # Generate a PostScript string
1039 sub ps_string($) {
1040 my ($s) = @_;
1041 my ($i,$c);
1042 my ($o) = '(';
1043 my ($l) = length($s);
1044 for ( $i = 0 ; $i < $l ; $i++ ) {
1045 $c = substr($s,$i,1);
1046 if ( ord($c) < 32 || ord($c) > 126 ) {
1047 $o .= sprintf("\\%03o", ord($c));
1048 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1049 $o .= "\\".$c;
1050 } else {
1051 $o .= $c;
1054 return $o.')';
1057 # Generate PDF bookmarks
1058 print "%%BeginSetup\n";
1059 foreach $b ( @bookmarks ) {
1060 print '[/Title ', ps_string($b->[2]), "\n";
1061 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1062 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1065 # Ask the PostScript interpreter for the proper size media
1066 print "setpagesize\n";
1067 print "%%EndSetup\n";
1069 # Start a PostScript page
1070 sub ps_start_page() {
1071 $ps_page++;
1072 print "%%Page: $ps_page $ps_page\n";
1073 print "%%BeginPageSetup\n";
1074 print "save\n";
1075 print "%%EndPageSetup\n";
1076 print '/', $ps_page, " pa\n";
1079 # End a PostScript page
1080 sub ps_end_page($) {
1081 my($pn) = @_;
1082 if ( $pn ) {
1083 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1085 print "restore showpage\n";
1088 $ps_page = 0;
1090 # Title page
1091 ps_start_page();
1092 $title = $metadata{'title'} || '';
1093 $title =~ s/ \- / $charcode{'emdash'} /;
1095 $subtitle = $metadata{'subtitle'} || '';
1096 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1098 # Print title
1099 print "/ti ", ps_string($title), " def\n";
1100 print "/sti ", ps_string($subtitle), " def\n";
1101 print "lmarg pageheight 2 mul 3 div moveto\n";
1102 print "tfont0 setfont\n";
1103 print "/title linkdest ti show\n";
1104 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1105 print "0 setlinecap 3 setlinewidth\n";
1106 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1107 print "hfont1 setfont sti stringwidth pop neg ",
1108 -$HeadFont{leading}, " rmoveto\n";
1109 print "sti show\n";
1111 # Print logo, if there is one
1112 # FIX: To be 100% correct, this should look for DocumentNeeded*
1113 # and DocumentFonts in the header of the EPSF and add those to the
1114 # global header.
1115 if ( defined($metadata{epslogo}) &&
1116 sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1117 my @eps = ();
1118 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1119 my $line;
1120 my $scale = 1;
1121 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1122 my $maxheight = $psconf{pageheight}/3-40;
1123 my $width, $height;
1124 my $x, $y;
1126 while ( defined($line = <EPS>) ) {
1127 last if ( $line =~ /^%%EOF/ );
1128 if ( !defined($bbllx) &&
1129 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1130 $bbllx = $1+0; $bblly = $2+0;
1131 $bburx = $3+0; $bbury = $4+0;
1133 push(@eps,$line);
1135 close(EPS);
1137 if ( defined($bbllx) ) {
1138 $width = $bburx-$bbllx;
1139 $height = $bbury-$bblly;
1141 if ( $width > $maxwidth ) {
1142 $scale = $maxwidth/$width;
1144 if ( $height*$scale > $maxheight ) {
1145 $scale = $maxheight/$height;
1148 $x = ($psconf{pagewidth}-$width*$scale)/2;
1149 $y = ($psconf{pageheight}-$height*$scale)/2;
1151 if ( defined($metadata{logoxadj}) ) {
1152 $x += $metadata{logoxadj};
1154 if ( defined($metadata{logoyadj}) ) {
1155 $y += $metadata{logoyadj};
1158 print "BeginEPSF\n";
1159 print $x, ' ', $y, " translate\n";
1160 print $scale, " dup scale\n" unless ( $scale == 1 );
1161 print -$bbllx, ' ', -$bblly, " translate\n";
1162 print "$bbllx $bblly moveto\n";
1163 print "$bburx $bblly lineto\n";
1164 print "$bburx $bbury lineto\n";
1165 print "$bbllx $bbury lineto\n";
1166 print "$bbllx $bblly lineto clip newpath\n";
1167 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1168 print @eps;
1169 print "%%EndDocument\n";
1170 print "EndEPSF\n";
1173 ps_end_page(0);
1175 # Emit the rest of the document (page 2 and on)
1176 $curpage = 2;
1177 ps_start_page();
1178 foreach $line ( @pslines ) {
1179 my $linfo = $line->[0];
1181 if ( $$linfo[4] != $curpage ) {
1182 ps_end_page($curpage > 2);
1183 ps_start_page();
1184 $curpage = $$linfo[4];
1187 print '[';
1188 my $curfont = 0;
1189 foreach my $c ( @{$line->[1]} ) {
1190 if ( $$c[0] >= 0 ) {
1191 if ( $curfont != $$c[0] ) {
1192 print ($curfont = $$c[0]);
1194 print ps_string($$c[1]);
1195 } elsif ( $$c[0] == -1 ) {
1196 print '{el}'; # End link
1197 } elsif ( $$c[0] == -2 ) {
1198 print '{/',$$c[1],' xl}'; # xref link
1199 } elsif ( $$c[0] == -3 ) {
1200 print '{',ps_string($$c[1]),'wl}'; # web link
1201 } elsif ( $$c[0] == -4 ) {
1202 # Index anchor -- ignore
1203 } elsif ( $$c[0] == -5 ) {
1204 print '{/',$$c[1],' xa}'; #xref anchor
1205 } elsif ( $$c[0] == -6 ) {
1206 print ']['; # Start a new array
1207 $curfont = 0;
1208 } elsif ( $$c[0] == -7 ) {
1209 print '{/',$$c[1],' pl}'; # page link
1210 } else {
1211 die "Unknown annotation";
1214 print ']';
1215 if ( defined($$linfo[2]) ) {
1216 foreach my $x ( @{$$linfo[2]} ) {
1217 if ( $$x[0] == $AuxStr ) {
1218 print ps_string($$x[1]);
1219 } elsif ( $$x[0] == $AuxPage ) {
1220 print $ps_xref_page{$$x[1]},' ';
1221 } elsif ( $$x[0] == $AuxPageStr ) {
1222 print ps_string($ps_xref_page{$$x[1]});
1223 } elsif ( $$x[0] == $AuxXRef ) {
1224 print '/',ps_xref($$x[1]),' ';
1225 } elsif ( $$x[0] == $AuxNum ) {
1226 print $$x[1],' ';
1227 } else {
1228 die "Unknown auxilliary data type";
1232 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1233 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1234 print ' ', $$linfo[0].$$linfo[1], "\n";
1237 ps_end_page(1);
1238 print "%%EOF\n";