2 # ------------------------------------------------------------------------------
4 # Function: Format PDF Output from groff Markup
6 # Copyright (C) 2005, Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
9 # This file is part of groff.
11 # groff is free software; you can redistribute it and/or modify it under
12 # the terms of the GNU General Public License as published by the Free
13 # Software Foundation; either version 2, or (at your option) any later
16 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
17 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 # You should have received a copy of the GNU General Public License along
22 # with groff; see the file COPYING. If not, write to the Free Software
23 # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 # ------------------------------------------------------------------------------
27 # Set up an identifier for the NULL device.
28 # In most cases "/dev/null" will be correct, but some shells on
29 # MS-DOS/MS-Windows systems may require us to use "NUL".
32 test -c $NULLDEV || NULDEV
="NUL"
34 # Set up the command name to use in diagnostic messages.
35 # (We can't assume we have 'basename', so use the full path if required.
36 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
38 CMD
=`exec 2>$NULLDEV; basename $0` || CMD
=$0
40 # We need both 'grep' and 'sed' programs, to parse script options,
41 # and we also need 'cat', to display help and some error messages,
42 # so ensure they are all installed, before we continue.
44 CAT
=`exec 2>$NULLDEV ; set :\`type cat\
` ; eval echo '$'$#`
45 GREP
=`exec 2>$NULLDEV ; set :\`type grep\
` ; eval echo '$'$#`
46 SED
=`exec 2>$NULLDEV ; set :\`type sed\
` ; eval echo '$'$#`
48 # Another fundamental requirement is the 'groff' program itself;
49 # we will prefer any version existing in a specified GROFF_BIN_DIR,
50 # or, if unspecified, the installed location of 'groff' programs;
51 # (we DO NOT use a PATH search, to locate 'groff').
53 GBIN
=${GROFF_BIN_DIR-"@GROFF_BIN_DIR@"}
54 GROFF
=`exec 2>$NULLDEV ; set :\`type $GBIN/groff\
` ; eval echo '$'$#`
56 # If one or more of these is missing, diagnose and bail out.
59 NOPROG
="$CMD: installation problem: cannot find program"
60 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO
="$NO 'cat'"
61 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO
="$NO 'grep'"
62 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GBIN" && NO
="$NO 'groff'"
63 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO
="$NO 'sed'"
67 test $# -gt 1 && NO
="s" IS
="are" || NO
='' IS
="is"
70 test $# -gt 2 && NO
="$NO $1,"
71 test $# -eq 2 && NO
="$NO $1 and" && shift
72 test $# -lt 2 && NO
="$NO $1"
77 *** FATAL INSTALLATION ERROR ***
79 The program$NO $IS required by '$CMD',
80 but cannot be found; '$CMD' is unable to continue.
86 # Set up temporary/intermediate file locations.
88 WRKFILE
=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
90 REFCOPY
=${GROFF_TMPDIR}/pdf$$.
cmp
91 REFFILE
=${GROFF_TMPDIR}/pdf$$.ref
94 TC_DATA
=${GROFF_TMPDIR}/pdf$$.tc
95 BD_DATA
=${GROFF_TMPDIR}/pdf$$.ps
97 # Set a trap, to delete temporary files on exit.
98 # (FIXME: may want to include other signals, in released version).
100 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
102 # Initialise 'groff' format control settings,
103 # to discriminate table of contents and document body formatting passes.
105 TOC_FORMAT
="-rPHASE=1"
106 BODY_FORMAT
="-rPHASE=2"
109 help reference-dictionary no-reference-dictionary
110 stylesheet pdf-output no-pdf-output
111 version report-progress no-toc-relocation
113 # Parse the command line, to identify 'pdfroff' specific options.
114 # Collect all other parameters into new argument and file lists,
115 # to be passed on to 'groff', enforcing the '-Tps' option.
117 DIFF
="" STREAM
="" INPUT_FILES
=""
118 SHOW_VERSION
="" ARGLIST
="-Tps" GROFF_STYLE
="$GROFF -Tps"
123 # Long options must be processed locally ...
127 # First identify, matching any abbreviation to its full form.
129 MATCH
="" OPTNAME
=`IFS==; set dummy $1; echo $2`
132 MATCH
="$MATCH`echo --$OPT | $GREP "^
$OPTNAME"`"
135 # For options in the form --option=value
136 # capture any specified value into $OPTARG.
138 OPTARG
=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
140 # Perform case specific processing for matched option ...
146 Usage: $CMD [-option ...] [--long-option ...] [file ...]
151 Display this usage summary, and exit.
155 Display a version identification message and exit.
158 Enable console messages, indicating the progress of the
159 PDF document formatting process.
162 Write the PDF output stream to file 'name'; if this option
163 is unspecified, standard output is used for PDF output.
166 Suppress the generation of PDF output entirely; use this
167 with the --reference-dictionary option, if processing a
168 document stream to produce only a reference dictionary.
170 --no-reference-dictionary
171 Suppress the generation of a '$CMD' reference dictionary
172 for the PDF document. Normally '$CMD' will create a
173 reference dictionary, at the start of document processing;
174 this option can accelerate processing, if it is known in
175 advance, that no reference dictionary is required.
177 --reference-dictionary=name
178 Save the document reference dictionary in file 'name'.
179 If 'name' already exists, when processing commences, it
180 will be used as the base case, from which the updated
181 dictionary will be derived. If this option is not used,
182 then the reference dictionary, created during the normal
183 execution of '$CMD', will be deleted on completion of
187 Use the file 'name' as a 'groff' style sheet, to control
188 the appearance of the document's front cover section. If
189 this option is not specified, then no special formatting
190 is applied, to create a front cover section.
193 Suppress the multiple pass 'groff' processing, which is
194 normally required to position the table of contents at the
195 start of a PDF document.
202 ARGLIST
="$ARGLIST \"$1\""
203 SHOW_VERSION
="GNU pdfroff (groff) version @VERSION@"
215 PDF_OUTPUT
="$NULLDEV"
218 --reference-dictionary)
222 --no-reference-dictionary)
223 DIFF
=":" REFFILE
="$NULLDEV" REFCOPY
="$NULLDEV"
227 STYLESHEET
="$OPTARG" CS_DATA
=${GROFF_TMPDIR}/pdf$$.cs
231 TC_DATA
="" TOC_FORMAT
="" BODY_FORMAT
=""
234 # any other non-null match must have matched more than one defined case,
235 # so report the ambiguity, and bail out.
238 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
242 # while no match at all simply represents an undefined case.
245 echo >&2 "$CMD: unknown option '$1'"
251 # A solitary hyphen, as an argument, means "stream STDIN through groff",
252 # while the "-i" option means "append STDIN stream to specified input files",
253 # so set up a mechanism to achieve this, for ALL 'groff' passes.
256 STREAM
="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
257 ARGLIST
="$ARGLIST $1" INPUT_FILES
="$INPUT_FILES $1"
260 # Those standard options which expect an argument, but are specified with
261 # an intervening space, between flag and argument, must be reparsed, so we
262 # can trap illegal use of '-T dev', or missing input files.
266 shift; set reparse
"$OPTNAME$@"
269 # Among standard options, '-Tdev' is treated as a special case.
270 # '-Tps' is automatically enforced, so if specified, is silently ignored.
274 # No other '-Tdev' option is permitted.
276 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
280 # '-h' and '-v' options redirect to their equivalent long forms ...
282 -h*) set redirect
--help
285 -v*) shift; set redirect
--version "$@"
288 # All other standard options are simply passed through to 'groff',
289 # with no validation beforehand.
291 -*) ARGLIST
="$ARGLIST \"$1\"" GROFF_STYLE
="$GROFF_STYLE \"$1\""
294 # All non-option arguments are considered as possible input file names,
295 # and are passed on to 'groff', unaltered.
297 *) ARGLIST
="$ARGLIST \"$1\""
298 test -f "$1" && INPUT_FILES
="$INPUT_FILES \"$1\""
304 # If the '-v' or '--version' option was specified,
305 # then we simply emulate the behaviour of 'groff', with this option,
308 if test -n "$SHOW_VERSION"
310 echo >&2 "$SHOW_VERSION"
311 echo >&2; eval $GROFF $ARGLIST
315 # Establish how to invoke 'echo', suppressing the terminating newline.
316 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
318 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
319 *c
*,*-n*) n
='' c
='' ;;
324 # If STDIN is specified among the input files,
325 # or if no input files are specified, then we need to capture STDIN,
326 # so we can replay it into each 'groff' processing pass.
328 test -z "$INPUT_FILES" && STREAM
="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
329 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.
in
331 # Unless reference resolution is explicitly suppressed,
332 # we initiate it by touching the cross reference dictionary file,
333 # and initialise the comparator, to kickstart the reference resolver loop.
339 echo kickstart
> $REFCOPY
340 test "${SHOW_PROGRESS+"set"}" = "set" && SAY
=echo
342 # In order to correctly resolve 'pdfmark' references,
343 # we need to have both the 'awk' and 'diff' programs available.
346 set ${GROFF_AWK_INTERPRETER-"@GROFF_AWK_INTERPRETERS@"}
349 AWK
=`exec 2>$NULLDEV ; set :\`type $1\
` ; eval echo '$'$#`
350 test "$AWK" = ":" ||
set "$AWK"
353 DIFF
=`exec 2>$NULLDEV ; set :\`type diff\
` ; eval echo '$'$#`
354 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO
="$NO 'awk'"
355 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO
="$NO 'diff'"
359 SAY
=":" AWK
=":" DIFF
=":"
360 test $# -gt 1 && NO
="s $1 and $2 are" || NO
=" $1 is"
365 The program$NO required, but cannot be found;
366 consequently, '$CMD' is unable to resolve 'pdfmark' references.
368 Document processing will continue, but no 'pdfmark' reference dictionary
369 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
370 document, the formatting may not be correct.
376 # Run the multi-pass 'pdfmark' reference resolver loop ...
378 $SAY >&2 $n Resolving references ..
$c
379 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
382 # until all references are resolved, to yield consistent values
383 # in each of two consecutive passes, or until it seems that no consistent
384 # resolution is achievable.
387 PASS_INDICATOR
="${PASS_INDICATOR}."
388 if test "$PASS_INDICATOR" = "...."
391 # More than three passes required indicates a probable inconsistency
392 # in the source document; diagnose, and bail out.
396 $CMD: unable to resolve references consistently after three passes
397 $CMD: the source document may exhibit instability about the reference(s) ...
400 # Report the unresolved references, as a diff between the two pass files,
401 # preferring 'unified' or 'context' diffs, when available
404 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT
='-c0'
405 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT
='-u0'
406 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
410 # Replace the comparison file copy from any previous pass,
411 # with the most recently updated copy of the reference dictionary.
412 # (Some versions of 'mv' may not support overwriting of an existing file,
413 # so remove the old comparison file first).
418 # Run 'groff' and 'awk', to identify reference marks in the document source,
419 # filtering them into the reference dictionary; discard incomplete 'groff' output
422 eval $STREAM $GROFF -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $ARGLIST
423 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
428 # We MUST have resolved all 'pdfmark' references, such that the content of the
429 # updated reference dictionary file EXACTLY matches the last saved copy.
431 # If PDF output has been suppressed, then there is nothing more to do.
433 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
435 # We are now ready to start preparing the intermediate PostScript files,
436 # from which the PDF output will be compiled -- but before proceding further ...
437 # let's make sure we have a GhostScript interpreter to convert them!
439 set ${GROFF_GHOSTSCRIPT_INTERPRETER-"@GROFF_GHOSTSCRIPT_INTERPRETERS@"}
442 GS
=`exec 2>$NULLDEV ; set :\`type $1\
` ; eval echo '$'$#`
443 test "$GS" = ":" ||
set "$GS"
447 # If we could not find a GhostScript interpreter, then we can do no more.
451 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
454 *** FATAL INSTALLATION ERROR ***
456 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
457 Since you do not appear to have one installed, '$CMD' connot continue.
463 # We now extend the local copy of the reference dictionary file,
464 # to create a full 'pdfmark' reference map for the document ...
466 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
468 # Re-enable progress reporting, if necessary ...
469 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
470 # of spurious messages associated with reference resolution).
472 test "${SHOW_PROGRESS+"set"}" = "set" && SAY
=echo
474 # If a document cover style sheet is specified ...
475 # then we run a special formatting pass, to create a cover section file.
477 if test -n "$STYLESHEET"
480 $SAY >&2 $n "Formatting document ... front cover section ..$c"
481 CS_FILTER
="$STREAM $SED -n '/$DOT${CS_MACRO-"CS"}/,/$DOT${CE_MACRO-"CE"}/p'"
482 eval $CS_FILTER $INPUT_FILES |
eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
486 # If table of contents relocation is to be performed (it is, by default),
487 # then we run an extra 'groff' pass, to format a TOC intermediate file.
489 if test -n "$TC_DATA"
491 $SAY >&2 $n "Formatting document ... table of contents ..$c"
492 eval $STREAM $GROFF $TOC_FORMAT $REFCOPY $ARGLIST > $TC_DATA
496 # In all cases, a final 'groff' pass is required, to format the document body.
498 $SAY >&2 $n "Formatting document ... body section ..$c"
499 eval $STREAM $GROFF $BODY_FORMAT $REFCOPY $ARGLIST > $BD_DATA
503 # Invoke GhostScript as a PDF writer, to bind all of the generated
504 # PostScript intermediate files into a single PDF output file.
506 $SAY >&2 $n "Writing PDF output ..$c"
507 PDFWRITE
="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite"
509 # (This 'sed' script is a hack, to eliminate redundant blank pages).
513 /%%EndPageSetup/b finish
521 /^%%Page:.*0 *Cg *EP/d
522 ' $TC_DATA $BD_DATA |
$PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA -
525 # ------------------------------------------------------------------------------
526 # $Source: end of file