Adapt contrib/pdfmark
[s-roff.git] / contrib / pdfmark / pdfroff.sh.in
blob66e02a8e517679200c60fc7e6a4ed92a6a616de7
1 #/bin/sh
2 #@ Format PDF Output from groff Markup
4 # Copyright (c) 2014 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 # Copyright (C) 2005, 2006, Free Software Foundation, Inc.
7 # Written by Keith Marshall (keith.d.marshall@ntlworld.com)
9 # groff is free software; you can redistribute it and/or modify it under
10 # the terms of the GNU General Public License as published by the Free
11 # Software Foundation; either version 2, or (at your option) any later
12 # version.
14 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 # for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with groff; see the file COPYING. If not, write to the Free Software
21 # Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
23 # Set up an identifier for the NULL device.
24 # In most cases "/dev/null" will be correct, but some shells on
25 # MS-DOS/MS-Windows systems may require us to use "NUL".
27 NULLDEV="/dev/null"
28 test -c $NULLDEV || NULLDEV="NUL"
30 # Set up the command name to use in diagnostic messages.
31 # (We can't assume we have 'basename', so use the full path if required.
32 # Also use the 'exec 2>...' workaround for a bug in Cygwin's 'ash').
34 CMD=`exec 2>$NULLDEV; basename $0` || CMD=$0
36 # To ensure that prerequisite helper programs are available, and are
37 # executable, a [fairly] portable method of detecting such programs is
38 # provided by function `searchpath'.
40 searchpath(){
42 # Usage: searchpath progname path
44 IFS=${PATH_SEPARATOR-":"} prog=':'
45 for dir in $2
47 for ext in '' '.exe'
49 # try `progname' with all well known extensions
50 # (e.g. Win32 may require `progname.exe')
53 try="$dir/$1$ext"
54 test -f "$try" && test -x "$try" && prog="$try" && break
55 done
56 test "$prog" = ":" || break
57 done
58 echo "$prog"
60 # @PATH_SEARCH_SETUP@
62 # If the system maps '/bin/sh' to some 'zsh' implementation,
63 # then we may need this hack, adapted from autoconf code.
65 test x${ZSH_VERSION+"set"} = x"set" && NULLCMD=":" \
66 && (emulate sh) >$NULLDEV 2>&1 && emulate sh
68 # We need both 'grep' and 'sed' programs, to parse script options,
69 # and we also need 'cat', to display help and some error messages,
70 # so ensure they are all installed, before we continue.
72 CAT=`searchpath cat "$PATH"`
73 GREP=`searchpath grep "$PATH"`
74 SED=`searchpath sed "$PATH"`
76 # Another fundamental requirement is the 'groff' program itself;
77 # we MUST use a 'groff' program located in 'GROFF_BIN_DIR', if this
78 # is specified; if not, we will search 'GROFF_BIN_PATH', only falling
79 # back to a 'PATH' search, if neither of these is specified.
81 if test -n "$@U_ROFF@_BIN_DIR"
82 then
83 GPATH=@U_ROFF@_BIN_DIR
84 ROFF=`searchpath @L_ROFF@ "$@U_ROFF@_BIN_DIR"`
86 elif test -n "$@U_ROFF@_BIN_PATH"
87 then
88 GPATH=@U_ROFF@_BIN_PATH
89 ROFF=`searchpath @L_ROFF@ "$@U_ROFF@_BIN_PATH"`
91 else
92 GPATH=PATH
93 ROFF=`searchpath @L_ROFF@ "$PATH"`
96 # If one or more of these is missing, diagnose and bail out.
98 NO='' NOPROG="$CMD: installation problem: cannot find program"
99 test "$CAT" = ":" && echo >&2 "$NOPROG 'cat' in PATH" && NO="$NO 'cat'"
100 test "$GREP" = ":" && echo >&2 "$NOPROG 'grep' in PATH" && NO="$NO 'grep'"
101 test "$ROFF" = ":" && echo >&2 "$NOPROG '@L_ROFF@' in $GPATH" &&
102 NO="$NO '@L_ROFF@'"
103 test "$SED" = ":" && echo >&2 "$NOPROG 'sed' in PATH" && NO="$NO 'sed'"
104 if test -n "$NO"
105 then
106 set $NO
107 test $# -gt 1 && NO="s" IS="are" || NO='' IS="is"
108 while test $# -gt 0
110 test $# -gt 2 && NO="$NO $1,"
111 test $# -eq 2 && NO="$NO $1 and" && shift
112 test $# -lt 2 && NO="$NO $1"
113 shift
114 done
115 $CAT >&2 <<-ETX
117 *** FATAL INSTALLATION ERROR ***
119 The program$NO $IS required by '$CMD',
120 but cannot be found; '$CMD' is unable to continue.
123 exit 1
126 # Identify the postprocessor command, for writing PDF output.
127 # (May be forced, by defining PDFROFF_POSTPROCESSOR_COMMAND in the environment;
128 # if this is not set, leave blank to use the built in default).
130 if test -n "${@U_PDFROFF@_POSTPROCESSOR_COMMAND}"
131 then
132 @U_ROFF@_GHOSTSCRIPT_INTERPRETER=`set command ${@U_PDFROFF@_POSTPROCESSOR_COMMAND};
133 echo $2`
136 # Set up temporary/intermediate file locations. FIXME add shell lib etc.
138 test -n "${TMPDIR}" && test -d "${TMPDIR}" && i=1 || TMPDIR=/tmp
139 test ${i} -ne 0 || test -d "${TMPDIR}" || TMPDIR=.
141 WRKFILE=${TMPDIR}/pdf$$.tmp
143 REFCOPY=${TMPDIR}/pdf$$.cmp
144 REFFILE=${TMPDIR}/pdf$$.ref
146 CS_DATA=""
147 TC_DATA=${TMPDIR}/pdf$$.tc
148 BD_DATA=${TMPDIR}/pdf$$.ps
150 # Set a trap, to delete temporary files on exit.
151 # (FIXME: may want to include other signals, in released version).
153 trap "rm -f ${TMPDIR}/pdf$$.*" 0
155 # Initialise 'groff' format control settings,
156 # to discriminate table of contents and document body formatting passes.
158 TOC_FORMAT="-rPHASE=1"
159 BODY_FORMAT="-rPHASE=2"
161 LONGOPTS="
162 help reference-dictionary no-reference-dictionary
163 stylesheet pdf-output no-pdf-output
164 version report-progress no-toc-relocation
165 emit-ps keep-temporary-files no-kill-null-pages
167 # Parse the command line, to identify 'pdfroff' specific options.
168 # Collect all other parameters into new argument and file lists,
169 # to be passed on to 'groff', enforcing the '-Tps' option.
171 DIFF="" STREAM="" INPUT_FILES=""
172 SHOW_VERSION="" ROFF_STYLE="$ROFF -Tps"
173 while test $# -gt 0
175 case "$1" in
177 # Long options must be processed locally ...
179 --*)
181 # First identify, matching any abbreviation to its full form.
183 MATCH="" OPTNAME=`IFS==; set dummy $1; echo $2`
184 for OPT in $LONGOPTS
186 MATCH="$MATCH"`echo --$OPT | $GREP "^$OPTNAME"`
187 done
189 # For options in the form --option=value
190 # capture any specified value into $OPTARG.
192 OPTARG=`echo $1 | $SED -n s?"^${OPTNAME}="??p`
194 # Perform case specific processing for matched option ...
196 case "$MATCH" in
198 --help)
199 $CAT <<-ETX
200 Usage: $CMD [-option ...] [--long-option ...] [file ...]
202 Options:
204 --help
205 Display this usage summary, and exit.
208 --version
209 Display a version identification message and exit.
211 --report-progress
212 Enable console messages, indicating the progress of the
213 PDF document formatting process.
215 --emit-ps
216 Emit PostScript output instead of PDF; this may be useful
217 when the ultimate PDF output is to be generated by a more
218 specialised postprocessor, (e.g. gpresent), rather than
219 the default GhostScript PDF writer.
221 --pdf-output=name
222 Write the PDF, (or PostScript), output stream to file
223 'name'; if this option is unspecified, standard output
224 is used for PDF, (or PostScript), output.
226 --no-pdf-output
227 Suppress the generation of PDF, (or PostScript), output
228 entirely; use this with the --reference-dictionary option,
229 if processing a document stream to produce only a
230 reference dictionary.
232 --no-reference-dictionary
233 Suppress the generation of a '$CMD' reference dictionary
234 for the PDF document. Normally '$CMD' will create a
235 reference dictionary, at the start of document processing;
236 this option can accelerate processing, if it is known in
237 advance, that no reference dictionary is required.
239 --reference-dictionary=name
240 Save the document reference dictionary in file 'name'.
241 If 'name' already exists, when processing commences, it
242 will be used as the base case, from which the updated
243 dictionary will be derived. If this option is not used,
244 then the reference dictionary, created during the normal
245 execution of '$CMD', will be deleted on completion of
246 document processing.
248 --stylesheet=name
249 Use the file 'name' as a '@L_ROFF@' style sheet, to control
250 the appearance of the document's front cover section. If
251 this option is not specified, then no special formatting
252 is applied, to create a front cover section.
254 --no-toc-relocation
255 Suppress the multiple pass '@L_ROFF@' processing, which is
256 normally required to position the table of contents at the
257 start of a PDF document.
259 --no-kill-null-pages
260 Suppress the 'null page' elimination filter, which is used
261 to remove the excess blank pages produced by the collation
262 algorithm used for 'toc-relocation'.
264 --keep-temporary-files
265 Suppress the normal clean up of temporary files, which is
266 scheduled when '@L_PDFROFF@' completes.
269 exit 0
272 --version)
273 ROFF_STYLE="$ROFF_STYLE \"$1\""
274 SHOW_VERSION="@L_PDFROFF@ v@VERSION@"
277 --report-progress)
278 SHOW_PROGRESS=echo
281 --keep-temporary-files)
282 trap "" 0
285 --emit-ps)
286 @U_PDFROFF@_POSTPROCESSOR_COMMAND="$CAT"
289 --pdf-output)
290 PDF_OUTPUT="$OPTARG"
293 --no-pdf-output)
294 PDF_OUTPUT="$NULLDEV"
297 --reference-dictionary)
298 REFFILE="$OPTARG"
301 --no-reference-dictionary)
302 AWK=":" DIFF=":" REFFILE="$NULLDEV" REFCOPY="$NULLDEV"
305 --stylesheet)
306 STYLESHEET="$OPTARG" CS_DATA=${TMPDIR}/pdf$$.cs
309 --no-toc-relocation)
310 TC_DATA="" TOC_FORMAT="" BODY_FORMAT=""
313 --no-kill-null-pages)
314 @U_PDFROFF@_COLLATE="$CAT" @U_PDFROFF@_KILL_NULL_PAGES=""
317 # any other non-null match must have matched more than one defined case,
318 # so report the ambiguity, and bail out.
320 --*)
321 echo >&2 "$CMD: ambiguous abbreviation in option '$1'"
322 exit 1
325 # while no match at all simply represents an undefined case.
328 echo >&2 "$CMD: unknown option '$1'"
329 exit 1
331 esac
334 # A solitary hyphen, as an argument, means "stream STDIN through groff",
335 # while the "-i" option means "append STDIN stream to specified input files",
336 # so set up a mechanism to achieve this, for ALL 'groff' passes.
338 - | -i*)
339 STREAM="$CAT ${TMPDIR}/pdf$$.in |"
340 test "$1" = "-" && INPUT_FILES="$INPUT_FILES $1" \
341 || ROFF_STYLE="$ROFF_STYLE $1"
344 # Those standard options which expect an argument, but are specified with
345 # an intervening space, between flag and argument, must be reparsed, so we
346 # can trap invalid use of '-T dev', or missing input files.
348 -[dfFILmMnoPrTwW])
349 OPTNAME="$1"
350 shift; set reparse "$OPTNAME$@"
353 # Among standard options, '-Tdev' is treated as a special case.
354 # '-Tps' is automatically enforced, so if specified, is silently ignored.
356 -Tps) ;;
358 # No other '-Tdev' option is permitted.
360 -T*) echo >&2 "$CMD: option '$1' is incompatible with PDF output"
361 exit 1
364 # '-h' and '-v' options redirect to their equivalent long forms ...
366 -h*) set redirect --help
369 -v*) shift; set redirect --version "$@"
372 # All other standard options are simply passed through to 'groff',
373 # with no validation beforehand.
375 -*) ROFF_STYLE="$ROFF_STYLE \"$1\""
378 # All non-option arguments are considered as possible input file names,
379 # and are passed on to 'groff', unaltered.
381 *) INPUT_FILES="$INPUT_FILES \"$1\""
383 esac
384 shift
385 done
387 # If the '-v' or '--version' option was specified,
388 # then we simply emulate the behaviour of 'groff', with this option,
389 # and quit.
391 if test -n "$SHOW_VERSION"
392 then
393 echo >&2 "$SHOW_VERSION"
394 echo >&2; eval $ROFF_STYLE $INPUT_FILES
395 exit $?
398 # Establish how to invoke 'echo', suppressing the terminating newline.
399 # (Adapted from 'autoconf' code, as found in 'configure' scripts).
401 case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
402 *c*,*-n*) n='' c='' ;;
403 *c*) n='-n' c='' ;;
404 *) n='' c='\c' ;;
405 esac
407 # If STDIN is specified among the input files,
408 # or if no input files are specified, then we need to capture STDIN,
409 # so we can replay it into each 'groff' processing pass.
411 test -z "$INPUT_FILES" && STREAM="$CAT ${TMPDIR}/pdf$$.in |"
412 test -n "$STREAM" && $CAT > ${TMPDIR}/pdf$$.in
414 # Unless reference resolution is explicitly suppressed,
415 # we initiate it by touching the cross reference dictionary file,
416 # and initialise the comparator, to kickstart the reference resolver loop.
418 SAY=":"
419 if test -z "$DIFF"
420 then
421 >> $REFFILE
422 echo kickstart > $REFCOPY
423 test x${SHOW_PROGRESS+"set"} = x"set" && SAY=echo
425 # In order to correctly resolve 'pdfmark' references,
426 # we need to have both the 'awk' and 'diff' programs available.
428 NO=''
429 if test -n "$@U_ROFF@_AWK_INTERPRETER"
430 then
431 AWK="$@U_ROFF@_AWK_INTERPRETER"
432 test -f "$AWK" && test -x "$AWK" || AWK=":"
433 else
434 for prog in @AWK_INTERPRETERS@
436 AWK=`searchpath $prog "$PATH"`
437 test "$AWK" = ":" || break
438 done
440 DIFF=`searchpath diff "$PATH"`
441 test "$AWK" = ":" && echo >&2 "$NOPROG 'awk' in PATH" && NO="$NO 'awk'"
442 test "$DIFF" = ":" && echo >&2 "$NOPROG 'diff' in PATH" && NO="$NO 'diff'"
443 if test -n "$NO"
444 then
445 set $NO
446 SAY=":" AWK=":" DIFF=":"
447 test $# -gt 1 && NO="s $1 and $2 are" || NO=" $1 is"
448 $CAT >&2 <<-ETX
450 *** WARNING ***
452 The program$NO required, but cannot be found;
453 consequently, '$CMD' is unable to resolve 'pdfmark' references.
455 Document processing will continue, but no 'pdfmark' reference dictionary
456 will be compiled; if any 'pdfmark' reference appears in the resulting PDF
457 document, the formatting may not be correct.
463 # Run the multi-pass 'pdfmark' reference resolver loop ...
465 $SAY >&2 $n Resolving references ..$c
466 until $DIFF $REFCOPY $REFFILE 1>$NULLDEV 2>&1
469 # until all references are resolved, to yield consistent values
470 # in each of two consecutive passes, or until it seems that no consistent
471 # resolution is achievable.
473 $SAY >&2 $n .$c
474 PASS_INDICATOR="${PASS_INDICATOR}."
475 if test "$PASS_INDICATOR" = "...."
476 then
478 # More than three passes required indicates a probable inconsistency
479 # in the source document; diagnose, and bail out.
481 $SAY >&2 " failed"
482 $CAT >&2 <<-ETX
483 $CMD: unable to resolve references consistently after three passes
484 $CMD: the source document may exhibit instability about the reference(s) ...
487 # Report the unresolved references, as a diff between the two pass files,
488 # preferring 'unified' or 'context' diffs, when available
490 DIFFOPT=''
491 $DIFF -c0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-c0'
492 $DIFF -u0 $NULLDEV $NULLDEV 1>$NULLDEV 2>&1 && DIFFOPT='-u0'
493 $DIFF >&2 $DIFFOPT $REFCOPY $REFFILE
494 exit 1
497 # Replace the comparison file copy from any previous pass,
498 # with the most recently updated copy of the reference dictionary.
499 # (Some versions of 'mv' may not support overwriting of an existing file,
500 # so remove the old comparison file first).
502 rm -f $REFCOPY
503 mv $REFFILE $REFCOPY
505 # Run 'groff' and 'awk', to identify reference marks in the document source,
506 # filtering them into the reference dictionary; discard incomplete 'groff' output
507 # at this stage.
509 eval $STREAM $ROFF_STYLE -Z 1>$NULLDEV 2>$WRKFILE $REFCOPY $INPUT_FILES
510 $AWK '/^pdfmark-info:href/ {$1 = ".pdfhref D -N"; print}' $WRKFILE > $REFFILE
511 done
512 $SAY >&2 " done"
514 # To get to here ...
515 # We MUST have resolved all 'pdfmark' references, such that the content of the
516 # updated reference dictionary file EXACTLY matches the last saved copy.
518 # If PDF output has been suppressed, then there is nothing more to do.
520 test "$PDF_OUTPUT" = "$NULLDEV" && exit 0
522 # We are now ready to start preparing the intermediate PostScript files,
523 # from which the PDF output will be compiled -- but before proceding further ...
524 # let's make sure we have a GhostScript interpreter to convert them!
526 if test -n "$@U_ROFF@_GHOSTSCRIPT_INTERPRETER"
527 then
528 GS="$@U_ROFF@_GHOSTSCRIPT_INTERPRETER"
529 test -f "$GS" && test -x "$GS" || GS=":"
530 else
531 for prog in @GHOSTSCRIPT_INTERPRETERS@
533 GS=`searchpath $prog "$PATH"`
534 test "$GS" = ":" || break
535 done
538 # If we could not find a GhostScript interpreter, then we can do no more.
540 if test "$GS" = ":"
541 then
542 echo >&2 "$CMD: installation problem: cannot find GhostScript interpreter"
543 $CAT >&2 <<-ETX
545 *** FATAL INSTALLATION ERROR ***
547 '$CMD' requires a GhostScript interpreter to convert PostScript to PDF.
548 Since you do not appear to have one installed, '$CMD' connot continue.
551 exit 1
554 # We now extend the local copy of the reference dictionary file,
555 # to create a full 'pdfmark' reference map for the document ...
557 $AWK '/^@L_D_HTML@-info/ {print ".pdfhref Z", $2, $3, $4}' $WRKFILE >> $REFCOPY
559 # Re-enable progress reporting, if necessary ...
560 # (Missing 'awk' or 'diff' may have disabled it, to avoid display
561 # of spurious messages associated with reference resolution).
563 test x${SHOW_PROGRESS+"set"} = x"set" && SAY=echo
565 # If a document cover style sheet is specified ...
566 # then we run a special formatting pass, to create a cover section file.
568 if test -n "$STYLESHEET"
569 then
570 DOT='^\.[ ]*'
571 CS_MACRO=${CS_MACRO-"CS"} CE_MACRO=${CE_MACRO-"CE"}
572 $SAY >&2 $n "Formatting document ... front cover section ..$c"
573 CS_FILTER="$STREAM $SED -n '/${DOT}${CS_MACRO}/,/${DOT}${CE_MACRO}/p'"
574 eval $CS_FILTER $INPUT_FILES | eval $ROFF_STYLE $STYLESHEET - > $CS_DATA
575 $SAY >&2 ". done"
578 # If table of contents relocation is to be performed (it is, by default),
579 # then we run an extra 'groff' pass, to format a TOC intermediate file.
581 if test -n "$TC_DATA"
582 then
583 $SAY >&2 $n "Formatting document ... table of contents ..$c"
584 eval $STREAM $ROFF_STYLE $TOC_FORMAT $REFCOPY $INPUT_FILES > $TC_DATA
585 $SAY >&2 ". done"
588 # In all cases, a final 'groff' pass is required, to format the document body.
590 $SAY >&2 $n "Formatting document ... body section ..$c"
591 eval $STREAM $ROFF_STYLE $BODY_FORMAT $REFCOPY $INPUT_FILES > $BD_DATA
592 $SAY >&2 ". done"
594 # Finally ...
595 # Invoke GhostScript as a PDF writer, to bind all of the generated
596 # PostScript intermediate files into a single PDF output file.
598 $SAY >&2 $n "Writing PDF output ..$c"
599 if test -z "$@U_PDFROFF@_POSTPROCESSOR_COMMAND"
600 then
601 @U_PDFROFF@_POSTPROCESSOR_COMMAND="$GS -dQUIET -dBATCH -dNOPAUSE
602 -sDEVICE=pdfwrite -sOutputFile="${PDF_OUTPUT-"-"}
604 elif test -n "$PDF_OUTPUT"
605 then
606 exec > $PDF_OUTPUT
609 # (This 'sed' script is a hack, to eliminate redundant blank pages).
611 ${@U_PDFROFF@_COLLATE-"$SED"} ${@U_PDFROFF@_KILL_NULL_PAGES-'
612 /%%Page:/{
614 /%%BeginPageSetup/b again
617 :again
618 /%%EndPageSetup/b finish
619 /%%Page:/{
621 b again
624 :finish
626 /^%%Page:.*\n0 Cg EP$/d
627 '} $TC_DATA $BD_DATA | $@U_PDFROFF@_POSTPROCESSOR_COMMAND $CS_DATA -
628 $SAY >&2 ". done"
630 # s-sh-mode