Provide `pdfroff' shell script, and manpage to document it;
[s-roff.git] / contrib / pdfmark / pdfroff.sh
blob1313b7e35d26bb6dbfb0b40a8afe379379f26ace
1 #!/bin/sh
2 # ------------------------------------------------------------------------------
4 # Function: Format PDF Output from groff Markup
6 # Copyright (C) 2005, Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
8 #
9 # This file is part of groff.
11 # groff is free software; you can redistribute it and/or modify it under
12 # the terms of the GNU General Public License as published by the Free
13 # Software Foundation; either version 2, or (at your option) any later
14 # version.
16 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
17 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 # for more details.
21 # You should have received a copy of the GNU General Public License along
22 # with groff; see the file COPYING. If not, write to the Free Software
23 # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 # ------------------------------------------------------------------------------
27 # Set up an identifier for the NULL device.
28 # In most cases "/dev/null" will be correct, but some shells on
29 # MS-DOS/MS-Windows systems may require us to use "NUL".
31 NULLDEV="/dev/null"
32 test -c $NULLDEV || NULDEV="NUL"
34 # Set up the command name to use in diagnostic messages.
35 # (We can't assume we have 'basename', so use the full path if required.
36 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
38 CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0
40 # We need both 'grep' and 'sed' programs, to parse script options,
41 # and we also need 'cat', to display help and some error messages,
42 # so ensure they are all installed, before we continue.
44 CAT=`exec 2>$NULLDEV ; set :\`type cat\` ; eval echo '$'$#`
45 GREP=`exec 2>$NULLDEV ; set :\`type grep\` ; eval echo '$'$#`
46 SED=`exec 2>$NULLDEV ; set :\`type sed\` ; eval echo '$'$#`
48 # Another fundamental requirement is the 'groff' program itself;
49 # we will prefer any version existing in a specified GROFF_BIN_DIR,
50 # or, if unspecified, the installed location of 'groff' programs;
51 # (we DO NOT use a PATH search, to locate 'groff').
53 GBIN=${GROFF_BIN_DIR-"@GROFF_BIN_DIR@"}
54 GROFF=`exec 2>$NULLDEV ; set :\`type $GBIN/groff\` ; eval echo '$'$#`
56 # If one or more of these is missing, diagnose and bail out.
58 NO=''
59 NOPROG="$CMD: installation problem: cannot find program"
60 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'"
61 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'"
62 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GBIN" && NO="$NO 'groff'"
63 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'"
64 if test -n "$NO"
65 then
66 set $NO
67 test $# -gt 1 && NO="s" IS="are" || NO='' IS="is"
68 while test $# -gt 0
70 test $# -gt 2 && NO="$NO $1,"
71 test $# -eq 2 && NO="$NO $1 and" && shift
72 test $# -lt 2 && NO="$NO $1"
73 shift
74 done
75 $CAT >&2 <<-ETX
77 *** FATAL INSTALLATION ERROR ***
79 The program$NO $IS required by '$CMD',
80 but cannot be found; '$CMD' is unable to continue.
82 ETX
83 exit 1
86 # Set up temporary/intermediate file locations.
88 WRKFILE=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
90 REFCOPY=${GROFF_TMPDIR}/pdf$$.cmp
91 REFFILE=${GROFF_TMPDIR}/pdf$$.ref
93 CS_DATA=""
94 TC_DATA=${GROFF_TMPDIR}/pdf$$.tc
95 BD_DATA=${GROFF_TMPDIR}/pdf$$.ps
97 # Set a trap, to delete temporary files on exit.
98 # (FIXME: may want to include other signals, in released version).
100 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
102 # Initialise 'groff' format control settings,
103 # to discriminate table of contents and document body formatting passes.
105 TOC_FORMAT="-rPHASE=1"
106 BODY_FORMAT="-rPHASE=2"
108 LONGOPTS="
109 help reference-dictionary no-reference-dictionary
110 stylesheet pdf-output no-pdf-output
111 version report-progress no-toc-relocation
113 # Parse the command line, to identify 'pdfroff' specific options.
114 # Collect all other parameters into new argument and file lists,
115 # to be passed on to 'groff', enforcing the '-Tps' option.
117 DIFF="" STREAM="" INPUT_FILES=""
118 SHOW_VERSION="" ARGLIST="-Tps" GROFF_STYLE="$GROFF -Tps"
119 while test $# -gt 0
121 case "$1" in
123 # Long options must be processed locally ...
125 --*)
127 # First identify, matching any abbreviation to its full form.
129 MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2`
130 for OPT in $LONGOPTS
132 MATCH="$MATCH`echo --$OPT | $GREP "^$OPTNAME"`"
133 done
135 # For options in the form --option=value
136 # capture any specified value into $OPTARG.
138 OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
140 # Perform case specific processing for matched option ...
142 case "$MATCH" in
144 --help)
145 $CAT >&2 <<-ETX
146 Usage: $CMD [-option ...] [--long-option ...] [file ...]
148 Options:
150 --help
151 Display this usage summary, and exit.
154 --version
155 Display a version identification message and exit.
157 --report-progress
158 Enable console messages, indicating the progress of the
159 PDF document formatting process.
161 --pdf-output=name
162 Write the PDF output stream to file 'name'; if this option
163 is unspecified, standard output is used for PDF output.
165 --no-pdf-output
166 Suppress the generation of PDF output entirely; use this
167 with the --reference-dictionary option, if processing a
168 document stream to produce only a reference dictionary.
170 --no-reference-dictionary
171 Suppress the generation of a '$CMD' reference dictionary
172 for the PDF document. Normally '$CMD' will create a
173 reference dictionary, at the start of document processing;
174 this option can accelerate processing, if it is known in
175 advance, that no reference dictionary is required.
177 --reference-dictionary=name
178 Save the document reference dictionary in file 'name'.
179 If 'name' already exists, when processing commences, it
180 will be used as the base case, from which the updated
181 dictionary will be derived. If this option is not used,
182 then the reference dictionary, created during the normal
183 execution of '$CMD', will be deleted on completion of
184 document processing.
186 --stylesheet=name
187 Use the file 'name' as a 'groff' style sheet, to control
188 the appearance of the document's front cover section. If
189 this option is not specified, then no special formatting
190 is applied, to create a front cover section.
192 --no-toc-relocation
193 Suppress the multiple pass 'groff' processing, which is
194 normally required to position the table of contents at the
195 start of a PDF document.
198 exit 0
201 --version)
202 ARGLIST="$ARGLIST \"$1\""
203 SHOW_VERSION="GNU pdfroff (groff) version @VERSION@"
206 --report-progress)
207 SHOW_PROGRESS=echo
210 --pdf-output)
211 PDF_OUTPUT="$OPTARG"
214 --no-pdf-output)
215 PDF_OUTPUT="$NULLDEV"
218 --reference-dictionary)
219 REFFILE="$OPTARG"
222 --no-reference-dictionary)
223 DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV"
226 --stylesheet)
227 STYLESHEET="$OPTARG" CS_DATA=${GROFF_TMPDIR}/pdf$$.cs
230 --no-toc-relocation)
231 TC_DATA="" TOC_FORMAT="" BODY_FORMAT=""
234 # any other non-null match must have matched more than one defined case,
235 # so report the ambiguity, and bail out.
237 --*)
238 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
239 exit 1
242 # while no match at all simply represents an undefined case.
245 echo >&2 "$CMD: unknown option '$1'"
246 exit 1
248 esac
251 # A solitary hyphen, as an argument, means "stream STDIN through groff",
252 # while the "-i" option means "append STDIN stream to specified input files",
253 # so set up a mechanism to achieve this, for ALL 'groff' passes.
255 - | -i*)
256 STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
257 ARGLIST="$ARGLIST $1" INPUT_FILES="$INPUT_FILES $1"
260 # Those standard options which expect an argument, but are specified with
261 # an intervening space, between flag and argument, must be reparsed, so we
262 # can trap illegal use of '-T dev', or missing input files.
264 -[dfFILmMnoPrTwW])
265 OPTNAME="$1"
266 shift; set reparse "$OPTNAME$@"
269 # Among standard options, '-Tdev' is treated as a special case.
270 # '-Tps' is automatically enforced, so if specified, is silently ignored.
272 -Tps) ;;
274 # No other '-Tdev' option is permitted.
276 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
277 exit 1
280 # '-h' and '-v' options redirect to their equivalent long forms ...
282 -h*) set redirect --help
285 -v*) shift; set redirect --version "$@"
288 # All other standard options are simply passed through to 'groff',
289 # with no validation beforehand.
291 -*) ARGLIST="$ARGLIST \"$1\"" GROFF_STYLE="$GROFF_STYLE \"$1\""
294 # All non-option arguments are considered as possible input file names,
295 # and are passed on to 'groff', unaltered.
297 *) ARGLIST="$ARGLIST \"$1\""
298 test -f "$1" && INPUT_FILES="$INPUT_FILES \"$1\""
300 esac
301 shift
302 done
304 # If the '-v' or '--version' option was specified,
305 # then we simply emulate the behaviour of 'groff', with this option,
306 # and quit.
308 if test -n "$SHOW_VERSION"
309 then
310 echo >&2 "$SHOW_VERSION"
311 echo >&2; eval $GROFF $ARGLIST
312 exit $?
315 # Establish how to invoke 'echo', suppressing the terminating newline.
316 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
318 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
319 *c*,*-n*) n='' c='' ;;
320 *c*) n='-n' c='' ;;
321 *) n='' c='\c' ;;
322 esac
324 # If STDIN is specified among the input files,
325 # or if no input files are specified, then we need to capture STDIN,
326 # so we can replay it into each 'groff' processing pass.
328 test -z "$INPUT_FILES" && STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
329 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.in
331 # Unless reference resolution is explicitly suppressed,
332 # we initiate it by touching the cross reference dictionary file,
333 # and initialise the comparator, to kickstart the reference resolver loop.
335 SAY=":"
336 if test -z "$DIFF"
337 then
338 >> $REFFILE
339 echo kickstart > $REFCOPY
340 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
342 # In order to correctly resolve 'pdfmark' references,
343 # we need to have both the 'awk' and 'diff' programs available.
345 NO=''
346 set ${GROFF_AWK_INTERPRETER-"@GROFF_AWK_INTERPRETERS@"}
347 while test $# -gt 0
349 AWK=`exec 2>$NULLDEV ; set :\`type $1\` ; eval echo '$'$#`
350 test "$AWK" = ":" || set "$AWK"
351 shift
352 done
353 DIFF=`exec 2>$NULLDEV ; set :\`type diff\` ; eval echo '$'$#`
354 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'"
355 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'"
356 if test -n "$NO"
357 then
358 set $NO
359 SAY=":" AWK=":" DIFF=":"
360 test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is"
361 $CAT >&2 <<-ETX
363 *** WARNING ***
365 The program$NO required, but cannot be found;
366 consequently, '$CMD' is unable to resolve 'pdfmark' references.
368 Document processing will continue, but no 'pdfmark' reference dictionary
369 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
370 document, the formatting may not be correct.
376 # Run the multi-pass 'pdfmark' reference resolver loop ...
378 $SAY >&2 $n Resolving references ..$c
379 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
382 # until all references are resolved, to yield consistent values
383 # in each of two consecutive passes, or until it seems that no consistent
384 # resolution is achievable.
386 $SAY >&2 $n .$c
387 PASS_INDICATOR="${PASS_INDICATOR}."
388 if test "$PASS_INDICATOR" = "...."
389 then
391 # More than three passes required indicates a probable inconsistency
392 # in the source document; diagnose, and bail out.
394 $SAY >&2 " failed"
395 $CAT >&2 <<-ETX
396 $CMD: unable to resolve references consistently after three passes
397 $CMD: the source document may exhibit instability about the reference(s) ...
400 # Report the unresolved references, as a diff between the two pass files,
401 # preferring 'unified' or 'context' diffs, when available
403 DIFFOPT=''
404 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0'
405 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0'
406 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
407 exit 1
410 # Replace the comparison file copy from any previous pass,
411 # with the most recently updated copy of the reference dictionary.
412 # (Some versions of 'mv' may not support overwriting of an existing file,
413 # so remove the old comparison file first).
415 rm -f $REFCOPY
416 mv $REFFILE $REFCOPY
418 # Run 'groff' and 'awk', to identify reference marks in the document source,
419 # filtering them into the reference dictionary; discard incomplete 'groff' output
420 # at this stage.
422 eval $STREAM $GROFF -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $ARGLIST
423 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
424 done
425 $SAY >&2 " done"
427 # To get to here ...
428 # We MUST have resolved all 'pdfmark' references, such that the content of the
429 # updated reference dictionary file EXACTLY matches the last saved copy.
431 # If PDF output has been suppressed, then there is nothing more to do.
433 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
435 # We are now ready to start preparing the intermediate PostScript files,
436 # from which the PDF output will be compiled -- but before proceding further ...
437 # let's make sure we have a GhostScript interpreter to convert them!
439 set ${GROFF_GHOSTSCRIPT_INTERPRETER-"@GROFF_GHOSTSCRIPT_INTERPRETERS@"}
440 while test $# -gt 0
442 GS=`exec 2>$NULLDEV ; set :\`type $1\` ; eval echo '$'$#`
443 test "$GS" = ":" || set "$GS"
444 shift
445 done
447 # If we could not find a GhostScript interpreter, then we can do no more.
449 if test "$GS" = ":"
450 then
451 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
452 $CAT >&2 <<-ETX
454 *** FATAL INSTALLATION ERROR ***
456 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
457 Since you do not appear to have one installed, '$CMD' connot continue.
460 exit 1
463 # We now extend the local copy of the reference dictionary file,
464 # to create a full 'pdfmark' reference map for the document ...
466 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
468 # Re-enable progress reporting, if necessary ...
469 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
470 # of spurious messages associated with reference resolution).
472 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
474 # If a document cover style sheet is specified ...
475 # then we run a special formatting pass, to create a cover section file.
477 if test -n "$STYLESHEET"
478 then
479 DOT='^\.[ ]*'
480 $SAY >&2 $n "Formatting document ... front cover section ..$c"
481 CS_FILTER="$STREAM $SED -n '/$DOT${CS_MACRO-"CS"}/,/$DOT${CE_MACRO-"CE"}/p'"
482 eval $CS_FILTER $INPUT_FILES | eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
483 $SAY >&2 ". done"
486 # If table of contents relocation is to be performed (it is, by default),
487 # then we run an extra 'groff' pass, to format a TOC intermediate file.
489 if test -n "$TC_DATA"
490 then
491 $SAY >&2 $n "Formatting document ... table of contents ..$c"
492 eval $STREAM $GROFF $TOC_FORMAT $REFCOPY $ARGLIST > $TC_DATA
493 $SAY >&2 ". done"
496 # In all cases, a final 'groff' pass is required, to format the document body.
498 $SAY >&2 $n "Formatting document ... body section ..$c"
499 eval $STREAM $GROFF $BODY_FORMAT $REFCOPY $ARGLIST > $BD_DATA
500 $SAY >&2 ". done"
502 # Finally ...
503 # Invoke GhostScript as a PDF writer, to bind all of the generated
504 # PostScript intermediate files into a single PDF output file.
506 $SAY >&2 $n "Writing PDF output ..$c"
507 PDFWRITE="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite"
509 # (This 'sed' script is a hack, to eliminate redundant blank pages).
511 $SED '
512 :again
513 /%%EndPageSetup/b finish
514 /%%Page:/{
516 b again
519 :finish
521 /^%%Page:.*0 *Cg *EP/d
522 ' $TC_DATA $BD_DATA | $PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA -
523 $SAY >&2 ". done"
525 # ------------------------------------------------------------------------------
526 # $Source: end of file