Handle parsing anomalies in Cygwin's `ash', and similar, shells.
[s-roff.git] / contrib / pdfmark / pdfroff.sh
blobabb1892691da5795ee2949e79cab68dd6b818dfb
1 #!/bin/sh
2 # ------------------------------------------------------------------------------
4 # Function: Format PDF Output from groff Markup
6 # Copyright (C) 2005, Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
8 #
9 # This file is part of groff.
11 # groff is free software; you can redistribute it and/or modify it under
12 # the terms of the GNU General Public License as published by the Free
13 # Software Foundation; either version 2, or (at your option) any later
14 # version.
16 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
17 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 # for more details.
21 # You should have received a copy of the GNU General Public License along
22 # with groff; see the file COPYING. If not, write to the Free Software
23 # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 # ------------------------------------------------------------------------------
27 # Set up an identifier for the NULL device.
28 # In most cases "/dev/null" will be correct, but some shells on
29 # MS-DOS/MS-Windows systems may require us to use "NUL".
31 NULLDEV="/dev/null"
32 test -c $NULLDEV || NULDEV="NUL"
34 # Set up the command name to use in diagnostic messages.
35 # (We can't assume we have 'basename', so use the full path if required.
36 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
38 CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0
40 # We need both 'grep' and 'sed' programs, to parse script options,
41 # and we also need 'cat', to display help and some error messages,
42 # so ensure they are all installed, before we continue.
43 # (Again, note that we first check the status from 'type', BEFORE
44 # we attempt to use the result, because Cygwin's 'ash' uses 'stdout'
45 # instead of 'stderr', to display its 'not found' message).
47 CAT=':' GREP=':' SED=':'
48 type cat >$NULLDEV 2>&1 && CAT=`set :\`type cat\` ; eval echo '$'$#`
49 type grep >$NULLDEV 2>&1 && GREP=`set :\`type grep\` ; eval echo '$'$#`
50 type sed >$NULLDEV 2>&1 && SED=`set :\`type sed\` ; eval echo '$'$#`
52 # Another fundamental requirement is the 'groff' program itself;
53 # we will first perform a PATH search to locate this; however,
54 # we will prefer any version existing in a specified GROFF_BIN_DIR,
55 # or, if unspecified, the installed location of 'groff' programs;
56 # (this will override the result of the initial PATH search).
58 GROFF=':'
59 type groff >$NULLDEV 2>&1 && GROFF=`set :\`type groff\` ; eval echo '$'$#`
60 type ${GROFF_BIN_DIR="@GROFF_BIN_DIR@"}/groff >$NULLDEV 2>&1 \
61 && GROFF=`set :\`type $GROFF_BIN_DIR/groff\` ; eval echo '$'$#`
63 # If one or more of these is missing, diagnose and bail out.
65 NO=''
66 NOPROG="$CMD: installation problem: cannot find program"
67 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'"
68 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'"
69 test "$GROFF" = ":" && echo >&2 "$NOPROG 'groff' in $GBIN" && NO="$NO 'groff'"
70 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'"
71 if test -n "$NO"
72 then
73 set $NO
74 test $# -gt 1 && NO="s" IS="are" || NO='' IS="is"
75 while test $# -gt 0
77 test $# -gt 2 && NO="$NO $1,"
78 test $# -eq 2 && NO="$NO $1 and" && shift
79 test $# -lt 2 && NO="$NO $1"
80 shift
81 done
82 $CAT >&2 <<-ETX
84 *** FATAL INSTALLATION ERROR ***
86 The program$NO $IS required by '$CMD',
87 but cannot be found; '$CMD' is unable to continue.
89 ETX
90 exit 1
93 # Set up temporary/intermediate file locations.
95 WRKFILE=${GROFF_TMPDIR=${TMPDIR-${TMP-${TEMP-"."}}}}/pdf$$.tmp
97 REFCOPY=${GROFF_TMPDIR}/pdf$$.cmp
98 REFFILE=${GROFF_TMPDIR}/pdf$$.ref
100 CS_DATA=""
101 TC_DATA=${GROFF_TMPDIR}/pdf$$.tc
102 BD_DATA=${GROFF_TMPDIR}/pdf$$.ps
104 # Set a trap, to delete temporary files on exit.
105 # (FIXME: may want to include other signals, in released version).
107 trap "rm -f ${GROFF_TMPDIR}/pdf$$.*" 0
109 # Initialise 'groff' format control settings,
110 # to discriminate table of contents and document body formatting passes.
112 TOC_FORMAT="-rPHASE=1"
113 BODY_FORMAT="-rPHASE=2"
115 LONGOPTS="
116 help reference-dictionary no-reference-dictionary
117 stylesheet pdf-output no-pdf-output
118 version report-progress no-toc-relocation
120 # Parse the command line, to identify 'pdfroff' specific options.
121 # Collect all other parameters into new argument and file lists,
122 # to be passed on to 'groff', enforcing the '-Tps' option.
124 DIFF="" STREAM="" INPUT_FILES=""
125 SHOW_VERSION="" ARGLIST="-Tps" GROFF_STYLE="$GROFF -Tps"
126 while test $# -gt 0
128 case "$1" in
130 # Long options must be processed locally ...
132 --*)
134 # First identify, matching any abbreviation to its full form.
136 MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2`
137 for OPT in $LONGOPTS
139 MATCH="$MATCH`echo --$OPT | $GREP "^$OPTNAME"`"
140 done
142 # For options in the form --option=value
143 # capture any specified value into $OPTARG.
145 OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
147 # Perform case specific processing for matched option ...
149 case "$MATCH" in
151 --help)
152 $CAT >&2 <<-ETX
153 Usage: $CMD [-option ...] [--long-option ...] [file ...]
155 Options:
157 --help
158 Display this usage summary, and exit.
161 --version
162 Display a version identification message and exit.
164 --report-progress
165 Enable console messages, indicating the progress of the
166 PDF document formatting process.
168 --pdf-output=name
169 Write the PDF output stream to file 'name'; if this option
170 is unspecified, standard output is used for PDF output.
172 --no-pdf-output
173 Suppress the generation of PDF output entirely; use this
174 with the --reference-dictionary option, if processing a
175 document stream to produce only a reference dictionary.
177 --no-reference-dictionary
178 Suppress the generation of a '$CMD' reference dictionary
179 for the PDF document. Normally '$CMD' will create a
180 reference dictionary, at the start of document processing;
181 this option can accelerate processing, if it is known in
182 advance, that no reference dictionary is required.
184 --reference-dictionary=name
185 Save the document reference dictionary in file 'name'.
186 If 'name' already exists, when processing commences, it
187 will be used as the base case, from which the updated
188 dictionary will be derived. If this option is not used,
189 then the reference dictionary, created during the normal
190 execution of '$CMD', will be deleted on completion of
191 document processing.
193 --stylesheet=name
194 Use the file 'name' as a 'groff' style sheet, to control
195 the appearance of the document's front cover section. If
196 this option is not specified, then no special formatting
197 is applied, to create a front cover section.
199 --no-toc-relocation
200 Suppress the multiple pass 'groff' processing, which is
201 normally required to position the table of contents at the
202 start of a PDF document.
205 exit 0
208 --version)
209 ARGLIST="$ARGLIST \"$1\""
210 SHOW_VERSION="GNU pdfroff (groff) version @VERSION@"
213 --report-progress)
214 SHOW_PROGRESS=echo
217 --pdf-output)
218 PDF_OUTPUT="$OPTARG"
221 --no-pdf-output)
222 PDF_OUTPUT="$NULLDEV"
225 --reference-dictionary)
226 REFFILE="$OPTARG"
229 --no-reference-dictionary)
230 AWK=":" DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV"
233 --stylesheet)
234 STYLESHEET="$OPTARG" CS_DATA=${GROFF_TMPDIR}/pdf$$.cs
237 --no-toc-relocation)
238 TC_DATA="" TOC_FORMAT="" BODY_FORMAT=""
241 # any other non-null match must have matched more than one defined case,
242 # so report the ambiguity, and bail out.
244 --*)
245 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
246 exit 1
249 # while no match at all simply represents an undefined case.
252 echo >&2 "$CMD: unknown option '$1'"
253 exit 1
255 esac
258 # A solitary hyphen, as an argument, means "stream STDIN through groff",
259 # while the "-i" option means "append STDIN stream to specified input files",
260 # so set up a mechanism to achieve this, for ALL 'groff' passes.
262 - | -i*)
263 STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
264 ARGLIST="$ARGLIST $1" INPUT_FILES="$INPUT_FILES $1"
267 # Those standard options which expect an argument, but are specified with
268 # an intervening space, between flag and argument, must be reparsed, so we
269 # can trap illegal use of '-T dev', or missing input files.
271 -[dfFILmMnoPrTwW])
272 OPTNAME="$1"
273 shift; set reparse "$OPTNAME$@"
276 # Among standard options, '-Tdev' is treated as a special case.
277 # '-Tps' is automatically enforced, so if specified, is silently ignored.
279 -Tps) ;;
281 # No other '-Tdev' option is permitted.
283 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
284 exit 1
287 # '-h' and '-v' options redirect to their equivalent long forms ...
289 -h*) set redirect --help
292 -v*) shift; set redirect --version "$@"
295 # All other standard options are simply passed through to 'groff',
296 # with no validation beforehand.
298 -*) ARGLIST="$ARGLIST \"$1\"" GROFF_STYLE="$GROFF_STYLE \"$1\""
301 # All non-option arguments are considered as possible input file names,
302 # and are passed on to 'groff', unaltered.
304 *) ARGLIST="$ARGLIST \"$1\""
305 test -f "$1" && INPUT_FILES="$INPUT_FILES \"$1\""
307 esac
308 shift
309 done
311 # If the '-v' or '--version' option was specified,
312 # then we simply emulate the behaviour of 'groff', with this option,
313 # and quit.
315 if test -n "$SHOW_VERSION"
316 then
317 echo >&2 "$SHOW_VERSION"
318 echo >&2; eval $GROFF $ARGLIST
319 exit $?
322 # Establish how to invoke 'echo', suppressing the terminating newline.
323 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
325 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
326 *c*,*-n*) n='' c='' ;;
327 *c*) n='-n' c='' ;;
328 *) n='' c='\c' ;;
329 esac
331 # If STDIN is specified among the input files,
332 # or if no input files are specified, then we need to capture STDIN,
333 # so we can replay it into each 'groff' processing pass.
335 test -z "$INPUT_FILES" && STREAM="$CAT ${GROFF_TMPDIR}/pdf$$.in |"
336 test -n "$STREAM" && $CAT > ${GROFF_TMPDIR}/pdf$$.in
338 # Unless reference resolution is explicitly suppressed,
339 # we initiate it by touching the cross reference dictionary file,
340 # and initialise the comparator, to kickstart the reference resolver loop.
342 SAY=":"
343 if test -z "$DIFF"
344 then
345 >> $REFFILE
346 echo kickstart > $REFCOPY
347 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
349 # In order to correctly resolve 'pdfmark' references,
350 # we need to have both the 'awk' and 'diff' programs available.
352 NO='' AWK=':'
353 eval set ${GROFF_AWK_INTERPRETER-"@GROFF_AWK_INTERPRETERS@"}
354 while test $# -gt 0
356 type $1 >$NULLDEV 2>&1 && AWK=`set :\`type $1\` ; eval echo '$'$#`
357 test "$AWK" = ":" || set "$AWK"
358 shift
359 done
360 type diff >$NULLDEV 2>&1 && DIFF=`set :\`type diff\` ; eval echo '$'$#`
361 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'"
362 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'"
363 if test -n "$NO"
364 then
365 set $NO
366 SAY=":" AWK=":" DIFF=":"
367 test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is"
368 $CAT >&2 <<-ETX
370 *** WARNING ***
372 The program$NO required, but cannot be found;
373 consequently, '$CMD' is unable to resolve 'pdfmark' references.
375 Document processing will continue, but no 'pdfmark' reference dictionary
376 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
377 document, the formatting may not be correct.
383 # Run the multi-pass 'pdfmark' reference resolver loop ...
385 $SAY >&2 $n Resolving references ..$c
386 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
389 # until all references are resolved, to yield consistent values
390 # in each of two consecutive passes, or until it seems that no consistent
391 # resolution is achievable.
393 $SAY >&2 $n .$c
394 PASS_INDICATOR="${PASS_INDICATOR}."
395 if test "$PASS_INDICATOR" = "...."
396 then
398 # More than three passes required indicates a probable inconsistency
399 # in the source document; diagnose, and bail out.
401 $SAY >&2 " failed"
402 $CAT >&2 <<-ETX
403 $CMD: unable to resolve references consistently after three passes
404 $CMD: the source document may exhibit instability about the reference(s) ...
407 # Report the unresolved references, as a diff between the two pass files,
408 # preferring 'unified' or 'context' diffs, when available
410 DIFFOPT=''
411 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0'
412 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0'
413 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
414 exit 1
417 # Replace the comparison file copy from any previous pass,
418 # with the most recently updated copy of the reference dictionary.
419 # (Some versions of 'mv' may not support overwriting of an existing file,
420 # so remove the old comparison file first).
422 rm -f $REFCOPY
423 mv $REFFILE $REFCOPY
425 # Run 'groff' and 'awk', to identify reference marks in the document source,
426 # filtering them into the reference dictionary; discard incomplete 'groff' output
427 # at this stage.
429 eval $STREAM $GROFF -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $ARGLIST
430 $AWK '/^gropdf-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
431 done
432 $SAY >&2 " done"
434 # To get to here ...
435 # We MUST have resolved all 'pdfmark' references, such that the content of the
436 # updated reference dictionary file EXACTLY matches the last saved copy.
438 # If PDF output has been suppressed, then there is nothing more to do.
440 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
442 # We are now ready to start preparing the intermediate PostScript files,
443 # from which the PDF output will be compiled -- but before proceding further ...
444 # let's make sure we have a GhostScript interpreter to convert them!
446 GS=':'
447 eval set ${GROFF_GHOSTSCRIPT_INTERPRETER-"@GROFF_GHOSTSCRIPT_INTERPRETERS@"}
448 while test $# -gt 0
450 type $1 >$NULLDEV 2>&1 && GS=`set :\`type $1\` ; eval echo '$'$#`
451 test "$GS" = ":" || set "$GS"
452 shift
453 done
455 # If we could not find a GhostScript interpreter, then we can do no more.
457 if test "$GS" = ":"
458 then
459 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
460 $CAT >&2 <<-ETX
462 *** FATAL INSTALLATION ERROR ***
464 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
465 Since you do not appear to have one installed, '$CMD' connot continue.
468 exit 1
471 # We now extend the local copy of the reference dictionary file,
472 # to create a full 'pdfmark' reference map for the document ...
474 $AWK '/^grohtml-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
476 # Re-enable progress reporting, if necessary ...
477 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
478 # of spurious messages associated with reference resolution).
480 test "${SHOW_PROGRESS+"set"}" = "set" && SAY=echo
482 # If a document cover style sheet is specified ...
483 # then we run a special formatting pass, to create a cover section file.
485 if test -n "$STYLESHEET"
486 then
487 DOT='^\.[ ]*'
488 $SAY >&2 $n "Formatting document ... front cover section ..$c"
489 CS_FILTER="$STREAM $SED -n '/$DOT${CS_MACRO-"CS"}/,/$DOT${CE_MACRO-"CE"}/p'"
490 eval $CS_FILTER $INPUT_FILES | eval $GROFF_STYLE $STYLESHEET - > $CS_DATA
491 $SAY >&2 ". done"
494 # If table of contents relocation is to be performed (it is, by default),
495 # then we run an extra 'groff' pass, to format a TOC intermediate file.
497 if test -n "$TC_DATA"
498 then
499 $SAY >&2 $n "Formatting document ... table of contents ..$c"
500 eval $STREAM $GROFF $TOC_FORMAT $REFCOPY $ARGLIST > $TC_DATA
501 $SAY >&2 ". done"
504 # In all cases, a final 'groff' pass is required, to format the document body.
506 $SAY >&2 $n "Formatting document ... body section ..$c"
507 eval $STREAM $GROFF $BODY_FORMAT $REFCOPY $ARGLIST > $BD_DATA
508 $SAY >&2 ". done"
510 # Finally ...
511 # Invoke GhostScript as a PDF writer, to bind all of the generated
512 # PostScript intermediate files into a single PDF output file.
514 $SAY >&2 $n "Writing PDF output ..$c"
515 PDFWRITE="$GS -dQUIET -dBATCH -dNOPAUSE -sDEVICE=pdfwrite"
517 # (This 'sed' script is a hack, to eliminate redundant blank pages).
519 $SED '
520 :again
521 /%%EndPageSetup/b finish
522 /%%Page:/{
524 b again
527 :finish
529 /^%%Page:.*0 *Cg *EP/d
530 ' $TC_DATA $BD_DATA | $PDFWRITE -sOutputFile=${PDF_OUTPUT-"-"} $CS_DATA -
531 $SAY >&2 ". done"
533 # ------------------------------------------------------------------------------
534 # $Source: end of file