2 # ------------------------------------------------------------------------------
4 # Function: Format PDF Output from groff Markup
6 # Copyright (C) 2005, Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
9 # This file is part of groff.
11 # groff is free software; you can redistribute it and/or modify it under
12 # the terms of the GNU General Public License as published by the Free
13 # Software Foundation; either version 2, or (at your option) any later
16 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
17 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 # You should have received a copy of the GNU General Public License along
22 # with groff; see the file COPYING. If not, write to the Free Software
23 # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 # ------------------------------------------------------------------------------
27 # Set up an identifier for the NULL device.
28 # In most cases "/dev/null" will be correct, but some shells on
29 # MS-DOS/MS-Windows systems may require us to use "NUL".
32 test -c $NULLDEV || NULDEV
="NUL"
34 # Set up the command name to use in diagnostic messages.
35 # (We can't assume we have 'basename', so use the full path if required.
36 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
38 CMD
=`exec 2>$NULLDEV; basename $0` || CMD
=$0
40 # We need both 'grep' and 'sed' programs, to parse script options,
41 # and we also need 'cat', to display help and some error messages,
42 # so ensure they are all installed, before we continue.
43 # (Again, note that we first check the status from 'type', BEFORE
44 # we attempt to use the result, because Cygwin's 'ash' uses 'stdout'
45 # instead of 'stderr', to display its 'not found' message).
47 CAT
=':' GREP
=':' SED
=':'
48 type cat >$NULLDEV 2>&1 && CAT
=`set :\`type cat\
` ; eval echo '$'$#`
49 type grep >$NULLDEV 2>&1 && GREP
=`set :\`type grep\
` ; eval echo '$'$#`
50 type sed >$NULLDEV 2>&1 && SED
=`set :\`type sed\
` ; eval echo '$'$#`
52 # Another fundamental requirement is the 'groff' program itself;
53 # we will first perform a PATH search to locate this; however,
54 # we will prefer any version existing in a specified GROFF_BIN_DIR,
55 # or, if unspecified, the installed location of 'groff' programs;
56 # (this will override the result of the initial PATH search).
59 type groff >$NULLDEV 2>&1 && GROFF
=`set :\`type groff\
` ; eval echo '$'$#`
60 type ${GROFF_BIN_DIR="@GROFF_BIN_DIR@"}/groff >$NULLDEV 2>&1 \
61 && GROFF
=`set :\`type $GROFF_BIN_DIR/groff\
` ; eval echo '$'$#`
63 # If one or more of these is missing, diagnose and bail out.
66 NOPROG
="$CMD: installation problem: cannot find program"
67 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO
="$NO 'cat'"
68 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO
="$NO 'grep'"
69 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GBIN" && NO
="$NO 'groff'"
70 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO
="$NO 'sed'"
74 test $# -gt 1 && NO
="s" IS
="are" || NO
='' IS
="is"
77 test $# -gt 2 && NO
="$NO $1,"
78 test $# -eq 2 && NO
="$NO $1 and" && shift
79 test $# -lt 2 && NO
="$NO $1"
84 *** FATAL INSTALLATION ERROR ***
86 The program$NO $IS required by '$CMD',
87 but cannot be found; '$CMD' is unable to continue.
93 # Set up temporary/intermediate file locations.
95 WRKFILE
=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
97 REFCOPY
=${GROFF_TMPDIR}/pdf$$.
cmp
98 REFFILE
=${GROFF_TMPDIR}/pdf$$.ref
101 TC_DATA
=${GROFF_TMPDIR}/pdf$$.tc
102 BD_DATA
=${GROFF_TMPDIR}/pdf$$.ps
104 # Set a trap, to delete temporary files on exit.
105 # (FIXME: may want to include other signals, in released version).
107 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
109 # Initialise 'groff' format control settings,
110 # to discriminate table of contents and document body formatting passes.
112 TOC_FORMAT
="-rPHASE=1"
113 BODY_FORMAT
="-rPHASE=2"
116 help reference-dictionary no-reference-dictionary
117 stylesheet pdf-output no-pdf-output
118 version report-progress no-toc-relocation
120 # Parse the command line, to identify 'pdfroff' specific options.
121 # Collect all other parameters into new argument and file lists,
122 # to be passed on to 'groff', enforcing the '-Tps' option.
124 DIFF
="" STREAM
="" INPUT_FILES
=""
125 SHOW_VERSION
="" ARGLIST
="-Tps" GROFF_STYLE
="$GROFF -Tps"
130 # Long options must be processed locally ...
134 # First identify, matching any abbreviation to its full form.
136 MATCH
="" OPTNAME
=`IFS==; set dummy $1; echo $2`
139 MATCH
="$MATCH`echo --$OPT | $GREP "^
$OPTNAME"`"
142 # For options in the form --option=value
143 # capture any specified value into $OPTARG.
145 OPTARG
=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
147 # Perform case specific processing for matched option ...
153 Usage: $CMD [-option ...] [--long-option ...] [file ...]
158 Display this usage summary, and exit.
162 Display a version identification message and exit.
165 Enable console messages, indicating the progress of the
166 PDF document formatting process.
169 Write the PDF output stream to file 'name'; if this option
170 is unspecified, standard output is used for PDF output.
173 Suppress the generation of PDF output entirely; use this
174 with the --reference-dictionary option, if processing a
175 document stream to produce only a reference dictionary.
177 --no-reference-dictionary
178 Suppress the generation of a '$CMD' reference dictionary
179 for the PDF document. Normally '$CMD' will create a
180 reference dictionary, at the start of document processing;
181 this option can accelerate processing, if it is known in
182 advance, that no reference dictionary is required.
184 --reference-dictionary=name
185 Save the document reference dictionary in file 'name'.
186 If 'name' already exists, when processing commences, it
187 will be used as the base case, from which the updated
188 dictionary will be derived. If this option is not used,
189 then the reference dictionary, created during the normal
190 execution of '$CMD', will be deleted on completion of
194 Use the file 'name' as a 'groff' style sheet, to control
195 the appearance of the document's front cover section. If
196 this option is not specified, then no special formatting
197 is applied, to create a front cover section.
200 Suppress the multiple pass 'groff' processing, which is
201 normally required to position the table of contents at the
202 start of a PDF document.
209 ARGLIST
="$ARGLIST \"$1\""
210 SHOW_VERSION
="GNU pdfroff (groff) version @VERSION@"
222 PDF_OUTPUT
="$NULLDEV"
225 --reference-dictionary)
229 --no-reference-dictionary)
230 AWK
=":" DIFF
=":" REFFILE
="$NULLDEV" REFCOPY
="$NULLDEV"
234 STYLESHEET
="$OPTARG" CS_DATA
=${GROFF_TMPDIR}/pdf$$.cs
238 TC_DATA
="" TOC_FORMAT
="" BODY_FORMAT
=""
241 # any other non-null match must have matched more than one defined case,
242 # so report the ambiguity, and bail out.
245 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
249 # while no match at all simply represents an undefined case.
252 echo >&2 "$CMD: unknown option '$1'"
258 # A solitary hyphen, as an argument, means "stream STDIN through groff",
259 # while the "-i" option means "append STDIN stream to specified input files",
260 # so set up a mechanism to achieve this, for ALL 'groff' passes.
263 STREAM
="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
264 ARGLIST
="$ARGLIST $1" INPUT_FILES
="$INPUT_FILES $1"
267 # Those standard options which expect an argument, but are specified with
268 # an intervening space, between flag and argument, must be reparsed, so we
269 # can trap illegal use of '-T dev', or missing input files.
273 shift; set reparse
"$OPTNAME$@"
276 # Among standard options, '-Tdev' is treated as a special case.
277 # '-Tps' is automatically enforced, so if specified, is silently ignored.
281 # No other '-Tdev' option is permitted.
283 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
287 # '-h' and '-v' options redirect to their equivalent long forms ...
289 -h*) set redirect
--help
292 -v*) shift; set redirect
--version "$@"
295 # All other standard options are simply passed through to 'groff',
296 # with no validation beforehand.
298 -*) ARGLIST
="$ARGLIST \"$1\"" GROFF_STYLE
="$GROFF_STYLE \"$1\""
301 # All non-option arguments are considered as possible input file names,
302 # and are passed on to 'groff', unaltered.
304 *) ARGLIST
="$ARGLIST \"$1\""
305 test -f "$1" && INPUT_FILES
="$INPUT_FILES \"$1\""
311 # If the '-v' or '--version' option was specified,
312 # then we simply emulate the behaviour of 'groff', with this option,
315 if test -n "$SHOW_VERSION"
317 echo >&2 "$SHOW_VERSION"
318 echo >&2; eval $GROFF $ARGLIST
322 # Establish how to invoke 'echo', suppressing the terminating newline.
323 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
325 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
326 *c
*,*-n*) n
='' c
='' ;;
331 # If STDIN is specified among the input files,
332 # or if no input files are specified, then we need to capture STDIN,
333 # so we can replay it into each 'groff' processing pass.
335 test -z "$INPUT_FILES" && STREAM
="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
336 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.
in
338 # Unless reference resolution is explicitly suppressed,
339 # we initiate it by touching the cross reference dictionary file,
340 # and initialise the comparator, to kickstart the reference resolver loop.
346 echo kickstart
> $REFCOPY
347 test "${SHOW_PROGRESS+"set"}" = "set" && SAY
=echo
349 # In order to correctly resolve 'pdfmark' references,
350 # we need to have both the 'awk' and 'diff' programs available.
353 eval set ${GROFF_AWK_INTERPRETER-"@GROFF_AWK_INTERPRETERS@"}
356 type $1 >$NULLDEV 2>&1 && AWK
=`set :\`type $1\
` ; eval echo '$'$#`
357 test "$AWK" = ":" ||
set "$AWK"
360 type diff >$NULLDEV 2>&1 && DIFF
=`set :\`type diff\
` ; eval echo '$'$#`
361 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO
="$NO 'awk'"
362 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO
="$NO 'diff'"
366 SAY
=":" AWK
=":" DIFF
=":"
367 test $# -gt 1 && NO
="s $1 and $2 are" || NO
=" $1 is"
372 The program$NO required, but cannot be found;
373 consequently, '$CMD' is unable to resolve 'pdfmark' references.
375 Document processing will continue, but no 'pdfmark' reference dictionary
376 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
377 document, the formatting may not be correct.
383 # Run the multi-pass 'pdfmark' reference resolver loop ...
385 $SAY >&2 $n Resolving references ..
$c
386 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
389 # until all references are resolved, to yield consistent values
390 # in each of two consecutive passes, or until it seems that no consistent
391 # resolution is achievable.
394 PASS_INDICATOR
="${PASS_INDICATOR}."
395 if test "$PASS_INDICATOR" = "...."
398 # More than three passes required indicates a probable inconsistency
399 # in the source document; diagnose, and bail out.
403 $CMD: unable to resolve references consistently after three passes
404 $CMD: the source document may exhibit instability about the reference(s) ...
407 # Report the unresolved references, as a diff between the two pass files,
408 # preferring 'unified' or 'context' diffs, when available
411 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT
='-c0'
412 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT
='-u0'
413 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
417 # Replace the comparison file copy from any previous pass,
418 # with the most recently updated copy of the reference dictionary.
419 # (Some versions of 'mv' may not support overwriting of an existing file,
420 # so remove the old comparison file first).
425 # Run 'groff' and 'awk', to identify reference marks in the document source,
426 # filtering them into the reference dictionary; discard incomplete 'groff' output
429 eval $STREAM $GROFF -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $ARGLIST
430 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
435 # We MUST have resolved all 'pdfmark' references, such that the content of the
436 # updated reference dictionary file EXACTLY matches the last saved copy.
438 # If PDF output has been suppressed, then there is nothing more to do.
440 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
442 # We are now ready to start preparing the intermediate PostScript files,
443 # from which the PDF output will be compiled -- but before proceding further ...
444 # let's make sure we have a GhostScript interpreter to convert them!
447 eval set ${GROFF_GHOSTSCRIPT_INTERPRETER-"@GROFF_GHOSTSCRIPT_INTERPRETERS@"}
450 type $1 >$NULLDEV 2>&1 && GS
=`set :\`type $1\
` ; eval echo '$'$#`
451 test "$GS" = ":" ||
set "$GS"
455 # If we could not find a GhostScript interpreter, then we can do no more.
459 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
462 *** FATAL INSTALLATION ERROR ***
464 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
465 Since you do not appear to have one installed, '$CMD' connot continue.
471 # We now extend the local copy of the reference dictionary file,
472 # to create a full 'pdfmark' reference map for the document ...
474 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
476 # Re-enable progress reporting, if necessary ...
477 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
478 # of spurious messages associated with reference resolution).
480 test "${SHOW_PROGRESS+"set"}" = "set" && SAY
=echo
482 # If a document cover style sheet is specified ...
483 # then we run a special formatting pass, to create a cover section file.
485 if test -n "$STYLESHEET"
488 $SAY >&2 $n "Formatting document ... front cover section ..$c"
489 CS_FILTER
="$STREAM $SED -n '/$DOT${CS_MACRO-"CS"}/,/$DOT${CE_MACRO-"CE"}/p'"
490 eval $CS_FILTER $INPUT_FILES |
eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
494 # If table of contents relocation is to be performed (it is, by default),
495 # then we run an extra 'groff' pass, to format a TOC intermediate file.
497 if test -n "$TC_DATA"
499 $SAY >&2 $n "Formatting document ... table of contents ..$c"
500 eval $STREAM $GROFF $TOC_FORMAT $REFCOPY $ARGLIST > $TC_DATA
504 # In all cases, a final 'groff' pass is required, to format the document body.
506 $SAY >&2 $n "Formatting document ... body section ..$c"
507 eval $STREAM $GROFF $BODY_FORMAT $REFCOPY $ARGLIST > $BD_DATA
511 # Invoke GhostScript as a PDF writer, to bind all of the generated
512 # PostScript intermediate files into a single PDF output file.
514 $SAY >&2 $n "Writing PDF output ..$c"
515 PDFWRITE
="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite"
517 # (This 'sed' script is a hack, to eliminate redundant blank pages).
521 /%%EndPageSetup/b finish
529 /^%%Page:.*0 *Cg *EP/d
530 ' $TC_DATA $BD_DATA |
$PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA -
533 # ------------------------------------------------------------------------------
534 # $Source: end of file