doc: clean up the "String Manipulation in Macros" section
[nasm/perl-rewrite.git] / doc / genps.pl
blob5517d84447239c596277ba9588e7cf4660310f34
1 #!/usr/bin/perl
3 # Format the documentation as PostScript
6 use Env;
7 use lib $srcdir;
9 require 'psfonts.ph'; # The fonts we want to use
10 require 'pswidth.ph'; # PostScript string width
12 use Fcntl;
15 # PostScript configurables; these values are also available to the
16 # PostScript code itself
18 %psconf = (
19 pagewidth => 595, # Page width in PostScript points
20 pageheight => 792, # Page height in PostScript points
21 lmarg => 100, # Left margin in PostScript points
22 rmarg => 50, # Right margin in PostScript points
23 topmarg => 100, # Top margin in PostScript points
24 botmarg => 100, # Bottom margin in PostScript points
25 plmarg => 50, # Page number position relative to left margin
26 prmarg => 0, # Page number position relative to right margin
27 pymarg => 50, # Page number position relative to bot margin
28 startcopyright => 75, # How much above the bottom margin is the
29 # copyright notice stuff
30 bulladj => 12, # How much to indent a bullet paragraph
31 tocind => 12, # TOC indentation per level
32 tocpnz => 24, # Width of TOC page number only zone
33 tocdots => 8, # Spacing between TOC dots
34 idxspace => 24, # Minimum space between index title and pg#
35 idxindent => 24, # How much to indent a subindex entry
36 idxgutter => 24, # Space between index columns
37 idxcolumns => 2, # Number of index columns
40 %psbool = (
41 colorlinks => 0, # Set links in blue rather than black
44 # Known paper sizes
45 %papersizes = (
46 'a5' => [421, 595], # ISO half paper size
47 'b5' => [501, 709], # ISO small paper size
48 'a4' => [595, 842], # ISO standard paper size
49 'letter' => [612, 792], # US common paper size
50 'pa4' => [595, 792], # Compromise ("portable a4")
51 'b4' => [709,1002], # ISO intermediate paper size
52 'legal' => [612,1008], # US intermediate paper size
53 'a3' => [842,1190], # ISO double paper size
54 '11x17' => [792,1224], # US double paper size
58 # Parse the command line
60 undef $input;
61 while ( $arg = shift(@ARGV) ) {
62 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
63 $parm = $2;
64 $true = ($1 eq '') ? 1 : 0;
65 if ( $true && defined($papersizes{$parm}) ) {
66 $psconf{pagewidth} = $papersizes{$parm}->[0];
67 $psconf{pageheight} = $papersizes{$parm}->[1];
68 } elsif ( defined($psbool{$parm}) ) {
69 $psbool{$parm} = $true;
70 } elsif ( $true && defined($psconf{$parm}) ) {
71 $psconf{$parm} = shift(@ARGV);
72 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
73 $metadata{$parm} = shift(@ARGV);
74 } else {
75 die "$0: Unknown option: $arg\n";
77 } else {
78 $input = $arg;
83 # Document formatting parameters
85 $paraskip = 6; # Space between paragraphs
86 $chapstart = 30; # Space before a chapter heading
87 $chapskip = 24; # Space after a chapter heading
88 $tocskip = 6; # Space between TOC entries
90 # Configure post-paragraph skips for each kind of paragraph
91 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
92 'head' => $paraskip, 'subh' => $paraskip,
93 'norm' => $paraskip, 'bull' => $paraskip,
94 'code' => $paraskip, 'toc0' => $tocskip,
95 'toc1' => $tocskip, 'toc2' => $tocskip);
97 # Custom encoding vector. This is basically the same as
98 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
99 # but with the "naked" accents at \200-\237 moved to the \000-\037
100 # range (ASCII control characters), and a few extra characters thrown
101 # in. It is basically a modified Windows 1252 codepage, minus, for
102 # now, the euro sign (\200 is reserved for euro.)
104 @NASMEncoding =
106 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
107 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
108 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
109 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
110 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
111 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
112 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
113 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
114 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
115 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
116 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
117 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
118 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
119 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
120 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
121 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
122 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
123 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
124 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
125 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
126 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
127 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
128 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
129 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
130 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
131 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
132 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
133 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
134 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
135 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
136 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
137 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
138 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
139 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
140 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
141 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
142 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
143 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
144 'thorn', 'ydieresis'
147 # Name-to-byte lookup hash
148 %charcode = ();
149 for ( $i = 0 ; $i < 256 ; $i++ ) {
150 $charcode{$NASMEncoding[$i]} = chr($i);
154 # First, format the stuff coming from the front end into
155 # a cleaner representation
157 if ( defined($input) ) {
158 sysopen(PARAS, $input, O_RDONLY) or
159 die "$0: cannot open $input: $!\n";
160 } else {
161 open(PARAS, "<&STDIN") or die "$0: $!\n";
163 while ( defined($line = <PARAS>) ) {
164 chomp $line;
165 $data = <PARAS>;
166 chomp $data;
167 if ( $line =~ /^meta :(.*)$/ ) {
168 $metakey = $1;
169 $metadata{$metakey} = $data;
170 } elsif ( $line =~ /^indx :(.*)$/ ) {
171 $ixentry = $1;
172 push(@ixentries, $ixentry);
173 $ixterms{$ixentry} = [split(/\037/, $data)];
174 # Look for commas. This is easier done on the string
175 # representation, so do it now.
176 if ( $data =~ /^(.*)\,\037sp\037/ ) {
177 $ixprefix = $1;
178 $ixprefix =~ s/\037n $//; # Discard possible font change at end
179 $ixhasprefix{$ixentry} = $ixprefix;
180 if ( !$ixprefixes{$ixprefix} ) {
181 $ixcommafirst{$ixentry}++;
183 $ixprefixes{$ixprefix}++;
184 } else {
185 # A complete term can also be used as a prefix
186 $ixprefixes{$data}++;
188 } else {
189 push(@ptypes, $line);
190 push(@paras, [split(/\037/, $data)]);
193 close(PARAS);
196 # Convert an integer to a chosen base
198 sub int2base($$) {
199 my($i,$b) = @_;
200 my($s) = '';
201 my($n) = '';
202 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
203 return '0' if ($i == 0);
204 if ( $i < 0 ) { $n = '-'; $i = -$i; }
205 while ( $i ) {
206 $s = substr($z,$i%$b,1) . $s;
207 $i = int($i/$b);
209 return $n.$s;
213 # Convert a string to a rendering array
215 sub string2array($)
217 my($s) = @_;
218 my(@a) = ();
220 $s =~ s/ \- / $charcode{'endash'} /g; # Replace " - " with en dash
222 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
223 push(@a, [0,$1]);
224 $s = $2;
227 return @a;
231 # Take a crossreference name and generate the PostScript name for it.
233 # This hack produces a somewhat smaller PDF...
234 #%ps_xref_list = ();
235 #$ps_xref_next = 0;
236 #sub ps_xref($) {
237 # my($s) = @_;
238 # my $q = $ps_xref_list{$s};
239 # return $q if ( defined($ps_xref_list{$s}) );
240 # $q = 'X'.int2base($ps_xref_next++, 52);
241 # $ps_xref_list{$s} = $q;
242 # return $q;
245 # Somewhat bigger PDF, but one which obeys # URLs
246 sub ps_xref($) {
247 return @_[0];
251 # Flow lines according to a particular font set and width
253 # A "font set" is represented as an array containing
254 # arrays of pairs: [<size>, <metricref>]
256 # Each line is represented as:
257 # [ [type,first|last,aux,fontset,page,ypos,optional col],
258 # [rendering array] ]
260 # A space character may be "squeezed" by up to this much
261 # (as a fraction of the normal width of a space.)
263 $ps_space_squeeze = 0.00; # Min space width 100%
264 sub ps_flow_lines($$$@) {
265 my($wid, $fontset, $type, @data) = @_;
266 my($fonts) = $$fontset{fonts};
267 my($e);
268 my($w) = 0; # Width of current line
269 my($sw) = 0; # Width of current line due to spaces
270 my(@l) = (); # Current line
271 my(@ls) = (); # Accumulated output lines
272 my(@xd) = (); # Metadata that goes with subsequent text
273 my $hasmarker = 0; # Line has -6 marker
274 my $pastmarker = 0; # -6 marker found
276 # If there is a -6 marker anywhere in the paragraph,
277 # *each line* output needs to have a -6 marker
278 foreach $e ( @data ) {
279 $hasmarker = 1 if ( $$e[0] == -6 );
282 $w = 0;
283 foreach $e ( @data ) {
284 if ( $$e[0] < 0 ) {
285 # Type is metadata. Zero width.
286 if ( $$e[0] == -6 ) {
287 $pastmarker = 1;
289 if ( $$e[0] == -1 || $$e[0] == -6 ) {
290 # -1 (end anchor) or -6 (marker) goes with the preceeding
291 # text, otherwise with the subsequent text
292 push(@l, $e);
293 } else {
294 push(@xd, $e);
296 } else {
297 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
298 \@NASMEncoding) *
299 ($fontset->{fonts}->[$$e[0]][0]/1000);
300 my $sp = $$e[1];
301 $sp =~ tr/[^ ]//d; # Delete nonspaces
302 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
303 \@NASMEncoding) *
304 ($fontset->{fonts}->[$$e[0]][0]/1000);
306 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
307 # Begin new line
308 # Search backwards for previous space chunk
309 my $lx = scalar(@l)-1;
310 my @rm = ();
311 while ( $lx >= 0 ) {
312 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
313 # Skip metadata
314 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
315 $lx--;
317 if ( $lx >= 0 ) {
318 if ( $l[$lx]->[1] eq ' ' ) {
319 splice(@l, $lx, 1);
320 @rm = splice(@l, $lx);
321 last; # Found place to break
322 } else {
323 $lx--;
328 # Now @l contains the stuff to remain on the old line
329 # If we broke the line inside a link, then split the link
330 # into two.
331 my $lkref = undef;
332 foreach my $lc ( @l ) {
333 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
334 $lkref = $lc;
335 } elsif ( $$lc[0] == -1 ) {
336 undef $lkref;
340 if ( defined($lkref) ) {
341 push(@l, [-1,undef]); # Terminate old reference
342 unshift(@rm, $lkref); # Duplicate reference on new line
345 if ( $hasmarker ) {
346 if ( $pastmarker ) {
347 unshift(@rm,[-6,undef]); # New line starts with marker
348 } else {
349 push(@l,[-6,undef]); # Old line ends with marker
353 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
354 @l = @rm;
356 $w = $sw = 0;
357 # Compute the width of the remainder array
358 for my $le ( @l ) {
359 if ( $$le[0] >= 0 ) {
360 my $xew = ps_width($$le[1],
361 $fontset->{fonts}->[$$le[0]][1],
362 \@NASMEncoding) *
363 ($fontset->{fonts}->[$$le[0]][0]/1000);
364 my $xsp = $$le[1];
365 $xsp =~ tr/[^ ]//d; # Delete nonspaces
366 my $xsw = ps_width($xsp,
367 $fontset->{fonts}->[$$le[0]][1],
368 \@NASMEncoding) *
369 ($fontset->{fonts}->[$$le[0]][0]/1000);
370 $w += $xew; $sw += $xsw;
374 push(@l, @xd); # Accumulated metadata
375 @xd = ();
376 if ( $$e[1] ne '' ) {
377 push(@l, $e);
378 $w += $ew; $sw += $esw;
382 push(@l,@xd);
383 if ( scalar(@l) ) {
384 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
387 # Mark the first line as first and the last line as last
388 if ( scalar(@ls) ) {
389 $ls[0]->[0]->[1] |= 1; # First in para
390 $ls[-1]->[0]->[1] |= 2; # Last in para
392 return @ls;
396 # Once we have broken things into lines, having multiple chunks
397 # with the same font index is no longer meaningful. Merge
398 # adjacent chunks to keep down the size of the whole file.
400 sub ps_merge_chunks(@) {
401 my(@ci) = @_;
402 my($c, $lc);
403 my(@co, $eco);
405 undef $lc;
406 @co = ();
407 $eco = -1; # Index of the last entry in @co
408 foreach $c ( @ci ) {
409 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
410 $co[$eco]->[1] .= $$c[1];
411 } else {
412 push(@co, $c); $eco++;
413 $lc = $$c[0];
416 return @co;
420 # Convert paragraphs to rendering arrays. Each
421 # element in the array contains (font, string),
422 # where font can be one of:
423 # -1 end link
424 # -2 begin crossref
425 # -3 begin weblink
426 # -4 index item anchor
427 # -5 crossref anchor
428 # -6 left/right marker (used in the index)
429 # -7 page link (used in the index)
430 # 0 normal
431 # 1 empatic (italic)
432 # 2 code (fixed spacing)
435 sub mkparaarray($@) {
436 my($ptype, @chunks) = @_;
438 my @para = ();
439 my $in_e = 0;
440 my $chunk;
442 if ( $ptype =~ /^code/ ) {
443 foreach $chunk ( @chunks ) {
444 push(@para, [2, $chunk]);
446 } else {
447 foreach $chunk ( @chunks ) {
448 my $type = substr($chunk,0,2);
449 my $text = substr($chunk,2);
451 if ( $type eq 'sp' ) {
452 push(@para, [$in_e?1:0, ' ']);
453 } elsif ( $type eq 'da' ) {
454 push(@para, [$in_e?1:0, $charcode{'endash'}]);
455 } elsif ( $type eq 'n ' ) {
456 push(@para, [0, $text]);
457 $in_e = 0;
458 } elsif ( $type =~ '^e' ) {
459 push(@para, [1, $text]);
460 $in_e = ($type eq 'es' || $type eq 'e ');
461 } elsif ( $type eq 'c ' ) {
462 push(@para, [2, $text]);
463 $in_e = 0;
464 } elsif ( $type eq 'x ' ) {
465 push(@para, [-2, ps_xref($text)]);
466 } elsif ( $type eq 'xe' ) {
467 push(@para, [-1, undef]);
468 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
469 $text =~ /\<(.*)\>(.*)$/;
470 my $link = $1; $text = $2;
471 push(@para, [-3, $link]);
472 push(@para, [($type eq 'wc') ? 2:0, $text]);
473 push(@para, [-1, undef]);
474 $in_e = 0;
475 } elsif ( $type eq 'i ' ) {
476 push(@para, [-4, $text]);
477 } else {
478 die "Unexpected paragraph chunk: $chunk";
482 return @para;
485 $npara = scalar(@paras);
486 for ( $i = 0 ; $i < $npara ; $i++ ) {
487 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
491 # This converts a rendering array to a simple string
493 sub ps_arraytostr(@) {
494 my $s = '';
495 my $c;
496 foreach $c ( @_ ) {
497 $s .= $$c[1] if ( $$c[0] >= 0 );
499 return $s;
503 # This generates a duplicate of a paragraph
505 sub ps_dup_para(@) {
506 my(@i) = @_;
507 my(@o) = ();
508 my($c);
510 foreach $c ( @i ) {
511 my @cc = @{$c};
512 push(@o, [@cc]);
514 return @o;
518 # This generates a duplicate of a paragraph, stripping anchor
519 # tags (-4 and -5)
521 sub ps_dup_para_noanchor(@) {
522 my(@i) = @_;
523 my(@o) = ();
524 my($c);
526 foreach $c ( @i ) {
527 my @cc = @{$c};
528 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
530 return @o;
534 # Scan for header paragraphs and fix up their contents;
535 # also generate table of contents and PDF bookmarks.
537 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
538 @tocptypes = ('chap');
539 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
540 %bookref = ();
541 for ( $i = 0 ; $i < $npara ; $i++ ) {
542 my $xtype = $ptypes[$i];
543 my $ptype = substr($xtype,0,4);
544 my $str;
545 my $book;
547 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
548 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
549 die "Bad para";
551 my $secn = $1;
552 my $sech = $2;
553 my $xref = ps_xref($sech);
554 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
556 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
557 push(@bookmarks, $book);
558 $bookref{$secn} = $book;
560 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
561 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
563 unshift(@{$paras[$i]},
564 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
565 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
566 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
567 die "Bad para";
569 my $secn = $1;
570 my $sech = $2;
571 my $xref = ps_xref($sech);
572 my $pref;
573 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
575 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
576 push(@bookmarks, $book);
577 $bookref{$secn} = $book;
578 $bookref{$pref}->[1]--; # Adjust count for parent node
580 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
581 push(@tocptypes,
582 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
584 unshift(@{$paras[$i]}, [-5, $xref]);
589 # Add TOC to beginning of paragraph list
591 unshift(@paras, @tocparas); undef @tocparas;
592 unshift(@ptypes, @tocptypes); undef @tocptypes;
595 # Add copyright notice to the beginning
597 unshift(@paras,
598 [[0, $charcode{'copyright'}], [0, ' '], [0,$metadata{'year'}],
599 [0, ' '], string2array($metadata{'author'})],
600 [string2array($metadata{'license'})]);
601 unshift(@ptypes, 'norm', 'norm');
603 $npara = scalar(@paras);
606 # No lines generated, yet.
608 @pslines = ();
611 # Line Auxilliary Information Types
613 $AuxStr = 1; # String
614 $AuxPage = 2; # Page number (from xref)
615 $AuxPageStr = 3; # Page number as a PostScript string
616 $AuxXRef = 4; # Cross reference as a name
617 $AuxNum = 5; # Number
620 # Break or convert paragraphs into lines, and push them
621 # onto the @pslines array.
623 sub ps_break_lines($$) {
624 my ($paras,$ptypes) = @_;
626 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
627 my $bullwidth = $linewidth-$psconf{bulladj};
628 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
629 -$psconf{idxspace};
631 my $npara = scalar(@{$paras});
632 my $i;
634 for ( $i = 0 ; $i < $npara ; $i++ ) {
635 my $xtype = $ptypes->[$i];
636 my $ptype = substr($xtype,0,4);
637 my @data = @{$paras->[$i]};
638 my @ls = ();
639 if ( $ptype eq 'code' ) {
640 my $p;
641 # Code paragraph; each chunk is a line
642 foreach $p ( @data ) {
643 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
645 $ls[0]->[0]->[1] |= 1; # First in para
646 $ls[-1]->[0]->[1] |= 2; # Last in para
647 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
648 # Chapters are flowed normally, but in an unusual font
649 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
650 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
651 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
652 die "Bad para";
654 my $secn = $1;
655 my $sech = $2;
656 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
657 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
658 # We need the heading number as auxillary data
659 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
660 } elsif ( $ptype eq 'norm' ) {
661 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
662 } elsif ( $ptype eq 'bull' ) {
663 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
664 } elsif ( $ptype =~ /^toc/ ) {
665 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
666 die "Bad para";
668 my $xref = $1;
669 my $refname = $2.' ';
670 my $ntoc = substr($ptype,3,1)+0;
671 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
672 \@NASMEncoding) *
673 ($BodyFont{fonts}->[0][0]/1000);
675 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
676 $psconf{tocpnz}-$refwidth,
677 \%BodyFont, $ptype, @data);
679 # Auxilliary data: for the first line, the cross reference symbol
680 # and the reference name; for all lines but the first, the
681 # reference width; and for the last line, the page number
682 # as a string.
683 my $nl = scalar(@ls);
684 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
685 for ( $j = 1 ; $j < $nl ; $j++ ) {
686 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
688 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
689 } elsif ( $ptype =~ /^idx/ ) {
690 my $lvl = substr($ptype,3,1)+0;
692 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
693 \%BodyFont, $ptype, @data);
694 } else {
695 die "Unknown para type: $ptype";
697 # Merge adjacent identical chunks
698 foreach $l ( @ls ) {
699 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
701 push(@pslines,@ls);
705 # Break the main body text into lines.
706 ps_break_lines(\@paras, \@ptypes);
709 # Break lines in to pages
712 # Where to start on page 2, the copyright page
713 $curpage = 2; # Start on page 2
714 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
715 $psconf{startcopyright};
716 undef $columnstart; # Not outputting columnar text
717 undef $curcolumn; # Current column
718 $nlines = scalar(@pslines);
721 # This formats lines inside the global @pslines array into pages,
722 # updating the page and y-coordinate entries. Start at the
723 # $startline position in @pslines and go to but not including
724 # $endline. The global variables $curpage, $curypos, $columnstart
725 # and $curcolumn are updated appropriately.
727 sub ps_break_pages($$) {
728 my($startline, $endline) = @_;
730 # Paragraph types which should never be broken
731 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
732 # Paragraph types which are heading (meaning they should not be broken
733 # immediately after)
734 my $nobreakafter = "^(chap|appn|head|subh)\$";
735 # Paragraph types which should never be broken *before*
736 my $nobreakbefore = "^idx[1-9]\$";
737 # Paragraph types which are set in columnar format
738 my $columnregexp = "^idx.\$";
740 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
742 my $i;
744 for ( $i = $startline ; $i < $endline ; $i++ ) {
745 my $linfo = $pslines[$i]->[0];
746 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
747 && ($$linfo[1] & 1) ) {
748 # First line of a new chapter heading. Start a new page.
749 undef $columnstart;
750 $curpage++ if ( $curypos > 0 || defined($columnstart) );
751 $curypos = $chapstart;
752 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
753 undef $columnstart;
754 $curpage++;
755 $curypos = 0;
758 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
759 $columnstart = $curypos;
760 $curcolumn = 0;
763 # Adjust position by the appropriate leading
764 $curypos += $$linfo[3]->{leading};
766 # Record the page and y-position
767 $$linfo[4] = $curpage;
768 $$linfo[5] = $curypos;
769 $$linfo[6] = $curcolumn if ( defined($columnstart) );
771 if ( $curypos > $upageheight ) {
772 # We need to break the page before this line.
773 my $broken = 0; # No place found yet
774 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
775 my $linfo = $pslines[$i]->[0];
776 my $pinfo = $pslines[$i-1]->[0];
778 if ( $$linfo[1] == 2 ) {
779 # This would be an orphan, don't break.
780 } elsif ( $$linfo[1] & 1 ) {
781 # Sole line or start of paragraph. Break unless
782 # the previous line was part of a heading.
783 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
784 $$linfo[0] !~ /$nobreakbefore/o );
785 } else {
786 # Middle of paragraph. Break unless we're in a
787 # no-break paragraph, or the previous line would
788 # end up being a widow.
789 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
790 $$pinfo[1] != 1 );
792 $i--;
794 die "Nowhere to break page $curpage\n" if ( !$broken );
795 # Now $i should point to line immediately before the break, i.e.
796 # the next paragraph should be the first on the new page
797 if ( defined($columnstart) &&
798 ++$curcolumn < $psconf{idxcolumns} ) {
799 # We're actually breaking text into columns, not pages
800 $curypos = $columnstart;
801 } else {
802 undef $columnstart;
803 $curpage++;
804 $curypos = 0;
806 next;
809 # Add end of paragraph skip
810 if ( $$linfo[1] & 2 ) {
811 $curypos += $skiparray{$$linfo[0]};
816 ps_break_pages(0,$nlines); # Break the main text body into pages
819 # Find the page number of all the indices
821 %ps_xref_page = (); # Crossref anchor pages
822 %ps_index_pages = (); # Index item pages
823 $nlines = scalar(@pslines);
824 for ( $i = 0 ; $i < $nlines ; $i++ ) {
825 my $linfo = $pslines[$i]->[0];
826 foreach my $c ( @{$pslines[$i]->[1]} ) {
827 if ( $$c[0] == -4 ) {
828 if ( !defined($ps_index_pages{$$c[1]}) ) {
829 $ps_index_pages{$$c[1]} = [];
830 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
831 # Pages are emitted in order; if this is a duplicated
832 # entry it will be the last one
833 next; # Duplicate
835 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
836 } elsif ( $$c[0] == -5 ) {
837 $ps_xref_page{$$c[1]} = $$linfo[4];
843 # Emit index paragraphs
845 $startofindex = scalar(@pslines);
846 @ixparas = ([[-5,'index'],[0,'Index']]);
847 @ixptypes = ('chap');
849 foreach $k ( @ixentries ) {
850 my $n,$i;
851 my $ixptype = 'idx0';
852 my $prefix = $ixhasprefix{$k};
853 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
854 my $commapos = undef;
856 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
857 # This entry has a "hanging comma"
858 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
859 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
860 $ixpara[$i+1]->[1] eq ' ' ) {
861 $commapos = $i;
862 last;
866 if ( defined($commapos) ) {
867 if ( $ixcommafirst{$k} ) {
868 # This is the first entry; generate the
869 # "hanging comma" entry
870 my @precomma = splice(@ixpara,0,$commapos);
871 if ( $ixpara[0]->[1] eq ',' ) {
872 shift(@ixpara); # Discard lone comma
873 } else {
874 # Discard attached comma
875 $ixpara[0]->[1] =~ s/\,$//;
876 push(@precomma,shift(@ixpara));
878 push(@precomma, [-6,undef]);
879 push(@ixparas, [@precomma]);
880 push(@ixptypes, $ixptype);
881 shift(@ixpara); # Remove space
882 } else {
883 splice(@ixpara,0,$commapos+2);
885 $ixptype = 'idx1';
888 push(@ixpara, [-6,undef]); # Left/right marker
889 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
890 foreach $p ( @{$ps_index_pages{$k}} ) {
891 if ( $i++ == $n ) {
892 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
893 } else {
894 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
898 push(@ixparas, [@ixpara]);
899 push(@ixptypes, $ixptype);
903 # Flow index paragraphs into lines
905 ps_break_lines(\@ixparas, \@ixptypes);
908 # Format index into pages
910 $nlines = scalar(@pslines);
911 ps_break_pages($startofindex, $nlines);
914 # Push index onto bookmark list
916 push(@bookmarks, ['index', 0, 'Index']);
918 # Get the list of fonts used
919 %ps_all_fonts = ();
920 foreach $fset ( @AllFonts ) {
921 foreach $font ( @{$fset->{fonts}} ) {
922 $ps_all_fonts{$font->[1]->{name}}++;
926 # Emit the PostScript DSC header
927 print "%!PS-Adobe-3.0\n";
928 print "%%Pages: $curpage\n";
929 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
930 print "%%Creator: (NASM psflow.pl)\n";
931 print "%%DocumentData: Clean7Bit\n";
932 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
933 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
934 print "%%Orientation: Portrait\n";
935 print "%%PageOrder: Ascend\n";
936 print "%%EndComments\n";
937 print "%%BeginProlog\n";
939 # Emit the configurables as PostScript tokens
940 foreach $c ( keys(%psconf) ) {
941 print "/$c ", $psconf{$c}, " def\n";
943 foreach $c ( keys(%psbool) ) {
944 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
947 # Emit custom encoding vector
948 $zstr = '/NASMEncoding [ ';
949 foreach $c ( @NASMEncoding ) {
950 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
951 if ( length($zstr)+length($z) > 72 ) {
952 print $zstr,"\n";
953 $zstr = ' ';
955 $zstr .= $z;
957 print $zstr, "] def\n";
959 # Font recoding routine
960 # newname fontname --
961 print "/nasmenc {\n";
962 print " findfont dup length dict begin\n";
963 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
964 print " /Encoding NASMEncoding def\n";
965 print " currentdict\n";
966 print " end\n";
967 print " definefont pop\n";
968 print "} def\n";
970 # Emit fontset definitions
971 foreach $font ( keys(%ps_all_fonts) ) {
972 print '/',$font,'-NASM /',$font," nasmenc\n";
975 foreach $fset ( @AllFonts ) {
976 my $i = 0;
977 my @zfonts = ();
978 foreach $font ( @{$fset->{fonts}} ) {
979 print '/', $fset->{name}, $i, ' ',
980 '/', $font->[1]->{name}, '-NASM findfont ',
981 $font->[0], " scalefont def\n";
982 push(@zfonts, $fset->{name}.$i);
983 $i++;
985 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
988 # This is used by the bullet-paragraph PostScript methods
989 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
991 # Emit the canned PostScript prologue
992 open(PSHEAD, "< head.ps");
993 while ( defined($line = <PSHEAD>) ) {
994 print $line;
996 close(PSHEAD);
997 print "%%EndProlog\n";
999 # Generate a PostScript string
1000 sub ps_string($) {
1001 my ($s) = @_;
1002 my ($i,$c);
1003 my ($o) = '(';
1004 my ($l) = length($s);
1005 for ( $i = 0 ; $i < $l ; $i++ ) {
1006 $c = substr($s,$i,1);
1007 if ( ord($c) < 32 || ord($c) > 126 ) {
1008 $o .= sprintf("\\%03o", ord($c));
1009 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1010 $o .= "\\".$c;
1011 } else {
1012 $o .= $c;
1015 return $o.')';
1018 # Generate PDF bookmarks
1019 print "%%BeginSetup\n";
1020 foreach $b ( @bookmarks ) {
1021 print '[/Title ', ps_string($b->[2]), "\n";
1022 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1023 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1026 # Ask the PostScript interpreter for the proper size media
1027 print "setpagesize\n";
1028 print "%%EndSetup\n";
1030 # Start a PostScript page
1031 sub ps_start_page() {
1032 $ps_page++;
1033 print "%%Page: $ps_page $ps_page\n";
1034 print "%%BeginPageSetup\n";
1035 print "save\n";
1036 print "%%EndPageSetup\n";
1037 print '/', $ps_page, " pa\n";
1040 # End a PostScript page
1041 sub ps_end_page($) {
1042 my($pn) = @_;
1043 if ( $pn ) {
1044 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1046 print "restore showpage\n";
1049 $ps_page = 0;
1051 # Title page
1052 ps_start_page();
1053 $title = $metadata{'title'} || '';
1054 $title =~ s/ \- / $charcode{'emdash'} /;
1056 $subtitle = $metadata{'subtitle'} || '';
1057 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1059 # Print title
1060 print "/ti ", ps_string($title), " def\n";
1061 print "/sti ", ps_string($subtitle), " def\n";
1062 print "lmarg pageheight 2 mul 3 div moveto\n";
1063 print "tfont0 setfont\n";
1064 print "/title linkdest ti show\n";
1065 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1066 print "0 setlinecap 3 setlinewidth\n";
1067 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1068 print "hfont1 setfont sti stringwidth pop neg ",
1069 -$HeadFont{leading}, " rmoveto\n";
1070 print "sti show\n";
1072 # Print logo, if there is one
1073 # FIX: To be 100% correct, this should look for DocumentNeeded*
1074 # and DocumentFonts in the header of the EPSF and add those to the
1075 # global header.
1076 if ( defined($metadata{epslogo}) &&
1077 sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1078 my @eps = ();
1079 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1080 my $line;
1081 my $scale = 1;
1082 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1083 my $maxheight = $psconf{pageheight}/3-40;
1084 my $width, $height;
1085 my $x, $y;
1087 while ( defined($line = <EPS>) ) {
1088 last if ( $line =~ /^%%EOF/ );
1089 if ( !defined($bbllx) &&
1090 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1091 $bbllx = $1+0; $bblly = $2+0;
1092 $bburx = $3+0; $bbury = $4+0;
1094 push(@eps,$line);
1096 close(EPS);
1098 if ( defined($bbllx) ) {
1099 $width = $bburx-$bbllx;
1100 $height = $bbury-$bblly;
1102 if ( $width > $maxwidth ) {
1103 $scale = $maxwidth/$width;
1105 if ( $height*$scale > $maxheight ) {
1106 $scale = $maxheight/$height;
1109 $x = ($psconf{pagewidth}-$width*$scale)/2;
1110 $y = ($psconf{pageheight}-$height*$scale)/2;
1112 print "BeginEPSF\n";
1113 print $x, ' ', $y, " translate\n";
1114 print $scale, " dup scale\n" unless ( $scale == 1 );
1115 print -$bbllx, ' ', -$bblly, " translate\n";
1116 print "$bbllx $bblly moveto\n";
1117 print "$bburx $bblly lineto\n";
1118 print "$bburx $bbury lineto\n";
1119 print "$bbllx $bbury lineto\n";
1120 print "$bbllx $bblly lineto clip newpath\n";
1121 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1122 print @eps;
1123 print "%%EndDocument\n";
1124 print "EndEPSF\n";
1127 ps_end_page(0);
1129 # Emit the rest of the document (page 2 and on)
1130 $curpage = 2;
1131 ps_start_page();
1132 foreach $line ( @pslines ) {
1133 my $linfo = $line->[0];
1135 if ( $$linfo[4] != $curpage ) {
1136 ps_end_page($curpage > 2);
1137 ps_start_page();
1138 $curpage = $$linfo[4];
1141 print '[';
1142 my $curfont = 0;
1143 foreach my $c ( @{$line->[1]} ) {
1144 if ( $$c[0] >= 0 ) {
1145 if ( $curfont != $$c[0] ) {
1146 print ($curfont = $$c[0]);
1148 print ps_string($$c[1]);
1149 } elsif ( $$c[0] == -1 ) {
1150 print '{el}'; # End link
1151 } elsif ( $$c[0] == -2 ) {
1152 print '{/',$$c[1],' xl}'; # xref link
1153 } elsif ( $$c[0] == -3 ) {
1154 print '{',ps_string($$c[1]),'wl}'; # web link
1155 } elsif ( $$c[0] == -4 ) {
1156 # Index anchor -- ignore
1157 } elsif ( $$c[0] == -5 ) {
1158 print '{/',$$c[1],' xa}'; #xref anchor
1159 } elsif ( $$c[0] == -6 ) {
1160 print ']['; # Start a new array
1161 $curfont = 0;
1162 } elsif ( $$c[0] == -7 ) {
1163 print '{/',$$c[1],' pl}'; # page link
1164 } else {
1165 die "Unknown annotation";
1168 print ']';
1169 if ( defined($$linfo[2]) ) {
1170 foreach my $x ( @{$$linfo[2]} ) {
1171 if ( $$x[0] == $AuxStr ) {
1172 print ps_string($$x[1]);
1173 } elsif ( $$x[0] == $AuxPage ) {
1174 print $ps_xref_page{$$x[1]},' ';
1175 } elsif ( $$x[0] == $AuxPageStr ) {
1176 print ps_string($ps_xref_page{$$x[1]});
1177 } elsif ( $$x[0] == $AuxXRef ) {
1178 print '/',ps_xref($$x[1]),' ';
1179 } elsif ( $$x[0] == $AuxNum ) {
1180 print $$x[1],' ';
1181 } else {
1182 die "Unknown auxilliary data type";
1186 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1187 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1188 print ' ', $$linfo[0].$$linfo[1], "\n";
1191 ps_end_page(1);
1192 print "%%EOF\n";