2 #@ mdocmx.sh - mdocmx(7) preprocessor for single-pass troff.
3 #@ mdocmx(7) extends the mdoc(7) semantic markup language by references,
4 #@ allowing mdoc(7) to create anchors and table of contents.
5 #@ Synopsis: mdocmx[.sh] [:-v:] [-t | -T Sh|sh|Ss|ss [-c]] [FILE]
6 #@ -v: increase verbosity
7 #@ -t: whether -toc lines shall be expanded to a flat .Sh TOC
8 #@ -T: whether -toc lines shall be expanded as specified: only .Sh / .Sh + .Ss
9 #@ -c: only with -t or -T: whether compact TOC display shall be generated
10 #@ Set $AWK environment to force a special awk(1) interpreter.
12 # Written 2014 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
16 : ${ENV_TMP="${TMPDIR}:${TMP}:${TEMP}"}
20 # For heaven's sake add special treatment for SunOS/Solaris
21 if [ -d /usr
/xpg
4/bin
]; then
22 PATH
=/usr
/xpg
4/bin
:$PATH
34 ( set -o noglob
) >/dev
/null
2>&1 && set -o noglob
37 [ -n "${AWK}" ] && return 0
42 # for i; do -- new in POSIX Issue 7 + TC1
45 if [ -z "${i}" ] ||
[ "${i}" = .
]; then
46 if [ -d "${PWD}" ]; then
54 [ -f "${AWK}" ] && [ -x "${AWK}" ] && return 0
62 [ -n "${msg}" ] && echo >&2 ${msg}
63 [ ${ex} -eq 0 ] && f
=1 || f
=2
64 ( echo "${0##*/}" ) >/dev/null 2>&1 && eval 'p="${0##*/}"' || p="${0}"
65 echo >&${f} "Synopsis: ${p} [-h]"
66 echo >&${f} " ${p} [:-v:] [-t | -T Sh|sh|Ss|ss [-c]] [FILE]"
72 if ( set -C ) >/dev
/null
2>&1; then
75 # For heaven's sake auto-redirect on SunOS/Solaris
76 if [ -f /usr
/xpg
4/bin
/sh
] && [ -x /usr
/xpg
4/bin
/sh
]; then
77 exec /usr
/xpg
4/bin
/sh
"${0}" "${@}"
79 synopsis
1 'Not a sh(1)ell with "set -C" (for save temporary file creation)'
83 find_awk || synopsis
1 'Cannot find a usable awk(1) implementation'
85 while getopts hvtT
:c i
; do
92 [ x
!= x
"${T}" ] && synopsis
${EX_USAGE} '-toc line expansion yet defined'
95 [ x
!= x
"${T}" ] && synopsis
${EX_USAGE} '-toc line expansion yet defined'
99 *) synopsis
${EX_USAGE} "Invalid -T argument: -- ${OPTARG}";;
104 synopsis
${EX_USAGE} '';;
107 [ -n "${TT}" ] && [ -z "${T}" ] && synopsis ${EX_USAGE} '-c requires -t or -T'
108 OPTIND=`expr ${OPTIND} - 1`
111 [ ${#} -gt 1 ] && synopsis ${EX_USAGE} 'Excess arguments given'
112 [ ${#} -eq 0 ] && F=- || F=${1}
116 # awk(1) doesn't support signal handlers, which means that, when we're part of
117 # a pipe which the user terminates, we are not capable to deal with the broken
118 # pipe case that our END{} handler will generate when we had to perform any
119 # preprocessing, and that in turn would result in a dangling temporary file!
120 # Thus the only sane option seems to be to always create the temporary file,
121 # whether we need it or not, not to exec(1) awk(1) but keep on running the shell
122 # in order to remove the temporary after awk(1) has finished, whichever way.
129 # for i; do -- new in POSIX Issue 7 + TC1
132 [ -d "${tmpdir}" ] && return 0
135 [ -d "${tmpdir}" ] && return 0
136 echo >&2 'Cannot find a usable temporary directory, please set $TMPDIR'
142 [ ${V} -gt 1 ] && max=3
144 # RW by user only, avoid overwriting of existing files
148 tmpfile="${tmpdir}/mdocmx-
${i}.mx
"
152 ) >/dev/null 2>&1 && break
154 if [ ${i} -gt ${max} ]; then
155 echo >&2 'Cannot create a temporary file within '"${tmpdir}"
159 trap "exit ${EX_TEMPFAIL}" HUP INT QUIT PIPE TERM
160 trap "trap \"\" HUP INT QUIT PIPE TERM EXIT
; rm -f ${tmpfile}" EXIT
163 # Let's go awk(1) {{{
165 ${AWK} -v VERBOSE=${V} -v TOC="${T}" -v TOCTYPE="${TT}" -v MX_FO="${tmpfile}" \
166 -v EX_USAGE
="${EX_USAGE}" -v EX_DATAERR
="${EX_DATAERR}" \
168 # The mdoc macros that support referenceable anchors.
169 # .Sh and .Ss also create anchors, but since they do not require .Mx they are
170 # treated special and handled directly -- update manual on change!
171 UMACS = "Ar Cd Cm Dv Er Ev Fl Fn Fo Ic In Pa Va Vt"
173 # Some of those impose special rules for their arguments; mdocmx(1) solves
174 # this by outsourcing such desires in argument parse preparation hooks
175 UMACS_KEYHOOKS = "Fl Fn"
177 # A list of all mdoc commands; taken from mdocml, "mdoc.c,v 1.226 2014/10/16"
179 "%A %B %C %D %I %J %N %O %P %Q %R %T %U %V " \
180 "Ac Ad An Ao Ap Aq Ar At Bc Bd " \
181 "Bf Bk Bl Bo Bq Brc Bro Brq Bsx Bt Bx " \
182 "Cd Cm D1 Db Dc Dd Dl Do Dq Dt Dv Dx " \
183 "Ec Ed Ef Ek El Em En Eo Er Es Ev Ex Fa Fc Fd Fl Fn Fo Fr Ft Fx " \
187 "Ms Mt Nd Nm No Ns Nx " \
188 "Oc Oo Op Os Ot Ox Pa Pc Pf Po Pp Pq " \
189 "Qc Ql Qo Qq Re Rs Rv " \
190 "Sc Sh Sm So Sq Ss St Sx Sy Ta Tn " \
191 "Ud Ux Va Vt Xc Xo Xr " \
194 # Punctuation to be ignored (without changing current mode)
195 UPUNCTS = ". , : ; ( ) [ ] ? !"
199 i = split(UMACS, savea)
200 for(j = 1; j <= i; ++j){
205 i = split(UMACS_KEYHOOKS, savea)
206 for(j = 1; j <= i; ++j){
211 i = split(UCOMMS, savea)
212 for(j = 1; j <= i; ++j){
217 i = split(UPUNCTS, savea)
218 for(j = 1; j <= i; ++j){
223 mx_bypass = 0 # No work if parsing already preprocessed file!
225 mx_nlcont = "" # Line continuation in progress? (Then: data so far)
226 mx_nlcontfun = 0 # Which function to call once line complete
227 NLCONT_SH_SS_COMM = 1
229 NLCONT_MX_CHECK_LINE = 3
231 #mx_sh[] # Arrays which store headlines, and their sizes
233 #mx_sh_toc # Special avoidance of multiple TOC anchors needed, ++
236 #mx_sh_ss[] # With TOC we need relation of .Ss with its .Sh
237 #mx_fo = "" # Our temporary output fork (cleaned of .Mx)
238 #mx_macros[] # Readily prepared anchors: macros..
239 #mx_keys[] # ..: keys
240 #mx_anchors_cnt # ..number of anchors
241 #mx_stack[] # Stack of future anchors to be parsed off..
242 #mx_stack_cnt # ..number thereof
243 #mx_keystack[] # User specified ".Mx MACRO KEY": store KEY somewhere
244 #ARG, [..] # Next parsed argument (from arg_parse() helper)
248 # If we were forced to create referenceable anchors, dump the temporary file
249 # after writing our table-of-anchors (TAO :)
254 warn("At end of file: \".Mx\" stack not empty (" mx_stack_cnt " levels)")
256 for(i = 1; i <= mx_sh_cnt; ++i){
257 printf ".Mx -anchor-spass Sh \"%s\" %d\n", arg_quote(mx_sh[i]), i
258 for(j = 1; j <= mx_ss_cnt; ++j)
260 printf ".Mx -anchor-spass Ss \"%s\" %d\n",
261 arg_quote(mx_ss[j]), mx_sh_ss[j]
264 for(i = 1; i <= mx_anchors_cnt; ++i)
265 printf ".Mx -anchor-spass %s \"%s\"\n",
266 mx_macros[i], arg_quote(mx_keys[i])
268 # If we are about to produce a TOC, intercept ".Mx -toc" lines and replace
269 # them with the desired TOC content
271 while(getline < mx_fo)
274 while(getline < mx_fo){
275 if($0 ~ /^[[:space:]]*\.[[:space:]]*Mx[[:space:]]+-toc[[:space:]]*/){
276 print ".Sh \"\\*[mx-toc-name]\""
278 print ".Bl -inset", TOCTYPE
279 for(i = 1; i <= mx_sh_cnt; ++i){
280 printf ".It Sx \"%s\"\n", arg_quote(mx_sh[i])
286 # Rather illegal, but maybe we have seen .Ss yet no .Sh: go!
287 else if(TOC == "Ss" && mx_ss_cnt > 0){
288 print ".Bl -tag -compact"
289 for(i = 1; i <= mx_ss_cnt; ++i)
290 print ".It Sx \"%s\"\n", arg_quote(mx_ss[i])
303 if(!fal || fal == "-")
311 print "DBG@" f_a_l() ": " s > "/dev/stderr"
316 print "WARN@" f_a_l() ": mdocmx(7): " s "." > "/dev/stderr"
319 function fatal(e, s){
320 print "FATAL@" f_a_l() ": mdocmx(7): " s "." > "/dev/stderr"
324 # Dump all .Ss which belong to the .Sh with the index sh_idx, if any
325 function toc_print_ss(sh_idx){
327 for(tps_i = 1; tps_i <= mx_ss_cnt; ++tps_i){
328 tps_j = mx_sh_ss[tps_i]
336 print ".Bl -tag -offset indent -compact"
338 printf ".It Sx \"%s\"\n", arg_quote(mx_ss[tps_i])
344 # Parse the next _roff_ argument from the awk(1) line (in $0).
345 # If "no" < 0, reset the parser and return whether the former state would
346 # have parsed another argument from the line.
347 # If "no" is >0 we start at $(no); if it is 0, iterate to the next argument.
348 # Returns ARG. Only used when "hot"
349 function arg_pushback(arg){ ap_pushback = arg }
350 function arg_parse(no){
368 for(ap_i = 0; no <= NF; ++no){
371 # The good news about quotation mode is that entering it requires
372 # a preceeding space: we get it almost for free with awk(1)!
376 ap_j = substr(ap_j, 2)
384 if((ap_k = index(ap_j, "\"")) != 0){
386 # The bad news on quotation mode are:
387 # - "" inside it resolves to a single "
388 # - " need not mark EOS, but any " that is not followed by "
389 # ends quotation mode and marks the beginning of the next arg
390 # - awk(1) has no goto;
391 if(ap_k == length(ap_j)){
392 ARG = ARG substr(ap_j, 1, ap_k - 1)
396 }else if(substr(ap_j, ap_k + 1, 1) == "\""){
397 ARG = ARG substr(ap_j, 1, ap_k)
398 ap_j = substr(ap_j, ap_k + 2)
400 ARG = ARG substr(ap_j, 1, ap_k)
401 ap_j = substr(ap_j, ap_k + 1)
407 }while((ap_k = index(ap_j, "\"")) > 0)
415 function arg_cleanup(arg){
416 # Deal with common special glyphs etc.
417 # Note: must be in sync with mdocmx(7) macros (mx:cleanup-string)!
418 ac_i = match(arg, /([ \t]|\\&|\\%|\\\/|\\c)+$/)
420 arg = substr(arg, 1, ac_i - 1)
421 while(arg ~ /^[ \t]/)
423 while(arg ~ /^(\\&|\\%)/ && arg !~ /^\\&\\&/)
428 function arg_quote(arg){
430 gsub("\"", "\"\"", aq_a)
435 function mx_enable(){
436 # However, are we running on an already preprocessed document? Bypass!
438 if($3 == "-preprocessed"){
445 # If we generate the TOC ourselfs better ensure the string mx-toc-name!
446 # mdocml.bsd.lv (mandoc(1)) does not offer any ".if !d NAME" way, so..
447 # But even otherwise we need it, since mandoc(1) complains about unknown
448 # \*[] strings in quoted strings, and we *may* have a ".Mx -toc" anyway!
450 printf ".\\\" Uncomment for mandoc(1) compat.:\n.\\\""
451 print ".ds mx-toc-name TABLE OF CONTENTS"
455 print ".Mx -enable -preprocessed" $0
458 # Deal with a non-"-enable" ".Mx" request
460 # No argument: plain push
463 dbg(".Mx: [noarg] -> +1, stack size=" mx_stack_cnt)
468 if($2 == "-disable"){
469 # Nothing to do here (and do not check device arguments)
473 # ".Mx -ix" / ".Mx -sx" freely definable anchors
475 # Nothing to do here (xxx check argument content validity?)
477 else if($2 == "-ix"){
478 mxc_macro = arg_parse(3)
480 fatal(EX_USAGE, "\".Mx -ix\": synopsis: \".Mx -ix [category] key\"")
481 if(!(mxc_key = arg_parse(0))){
484 }else if(arg_parse(-1))
485 fatal(EX_DATAERR, "\".Mx -ix\": data after USER KEY is faulty syntax")
486 mxc_key = arg_cleanup(mxc_key)
487 dbg(".Mx -ix mac<" mxc_macro "> key <" mxc_key ">")
488 anchor_add(mxc_macro, mxc_key)
494 # With TOC creation we surely want the TOC to have an anchor, too!
496 mx_sh[++mx_sh_cnt] = "\\*[mx-toc-name]"
498 warn("\".Mx -toc\": multiple TOCs? Duplicate anchor avoided")
502 # This explicitly specifies the macro to create an anchor for next
505 warn("\".Mx\": stripping dot prefix from \"" mxc_i "\"")
506 mxc_i = substr(mxc_i, 2)
511 fatal(EX_DATAERR, "\".Mx\": macro \"" mxc_i "\" not supported")
512 mx_stack[++mx_stack_cnt] = mxc_i
513 dbg(".Mx: for next \"." mxc_i "\", stack size=" mx_stack_cnt)
515 # Do we also have a fixed key?
518 mx_keystack[mx_stack_cnt] = arg_parse(3)
519 dbg(" ... USER KEY given: <" ARG ">")
521 fatal(EX_DATAERR, "\".Mx\": data after USER KEY is faulty syntax")
524 # mx_stack_cnt is >0, check whether this line will pop the stack
525 function mx_check_line(){
526 # May be line continuation in the middle of nowhere
529 # Must be a non-comment, non-escaped macro line
530 if($0 !~ /^[[:space:]]*[.'${APOSTROPHE}'][[:space:]]*[^"#]/)
533 # Iterate over all arguments and try to classify them, comparing them against
534 # stack content as applicable
538 for(arg_parse(-1); arg_parse(0);){
539 # Solely ignore punctuation (xxx are we too stupid here?)
543 # (xxx Do this at the end of the loop instead, after decrement?)
544 if(mx_stack_cnt == 0){
545 dbg("stack empty, stopping arg processing before <" ARG ">")
549 mcl_j = mx_stack[mx_stack_cnt]
551 # Is this something we consider a macro? For convenience and documentation
552 # of roff stuff do auto-ignore a leading dot of the name in question
554 if(mcl_firstmac && ARG ~ /^\./)
563 # It may be some mdoc command nonetheless, ensure it does not fool our
564 # simpleminded processing, and end possible mcl_mac savings
567 dbg("NO POP due macro (got<" ARG "> want<" mcl_j ">)")
575 # Current command matches the one on the stack, if there is any
578 dbg("NO POP due macro (got<" mcl_i "> want<" mcl_j ">)")
585 if(!mcl_cont && !arg_parse(0))
586 fatal(EX_DATAERR, "\".Mx\": expected KEY after \"" mcl_mac "\"")
587 ARG = arg_cleanup(ARG)
589 warn("\".Mx\": KEY starting with \"\\&\\&\" will never match: " ARG)
590 if(MACS_KEYHOOKS[mcl_mac])
591 _mx_check_line_keyhook()
593 if(mx_keystack[mx_stack_cnt]){
594 mcl_i = mx_keystack[mx_stack_cnt]
596 dbg("NO POP mac<" mcl_mac "> due key (got<" ARG "> want <" mcl_i ">)")
599 delete mx_keystack[mx_stack_cnt]
604 delete mx_stack[mx_stack_cnt--]
605 dbg("POP mac<" mcl_mac "> " mcl_i " key <" ARG \
606 "> stack size=" mx_stack_cnt)
608 anchor_add(mcl_mac, ARG)
612 function _mx_check_line_keyhook(){
613 # .Fl: arguments may be continued via |, as in ".Fl a | b | c"
616 for(mclpkh_j = 0;; ++mclpkh_j){
624 warn("Premature end of \".Fl\" continuation via \"|\"")
627 # Be aware that this argument may indeed be a macro
628 # XXX However, only support another Fl as in
629 # XXX .Op Fl T | Fl t Ar \&Sh | sh | \&Ss | ss
630 # XXX We are too stupid to recursively process any possible thing,
631 # XXX more complicated recursions are simply not supported
636 ARG = arg_cleanup(ARG)
637 mclpkh_i = mclpkh_i " | " ARG
641 # .Fn: in ".Fn const char *funcname" all we want is "funcname"
642 else if(mcl_mac == "Fn"){
643 if(ARG ~ /[*&[:space:]]/){
644 mclpkh_i = match(ARG, /[^*&[:space:]]+$/)
645 ARG = arg_cleanup(substr(ARG, mclpkh_i))
650 # Add one -anchor-spass macro/key pair
651 function anchor_add(macro, key){
652 for(aa_i = 1; aa_i <= mx_anchors_cnt; ++aa_i)
653 if(mx_macros[aa_i] == macro && mx_keys[aa_i] == key){
654 warn("\".Mx\": mac<" macro ">: duplicate anchor avoided: " key)
658 mx_macros[mx_anchors_cnt] = macro
659 mx_keys[mx_anchors_cnt] = key
662 # Handle a .Sh or .Ss
663 function sh_ss_comm(){
666 for(arg_parse(-1); arg_parse(0); ++ssc_i){
673 ssc_s = arg_cleanup(ssc_s)
675 mx_sh[++mx_sh_cnt] = ssc_s
678 fatal(EX_DATAERR, ".Ss at beginning of document not allowed by mdoc(7)")
679 mx_ss[++mx_ss_cnt] = ssc_s
680 mx_sh_ss[mx_ss_cnt] = mx_sh_cnt
684 # This is our *very* primitive way of dealing with line continuation
685 function line_nlcont_add(fun){
686 mx_nlcont = mx_nlcont $0
687 mx_nlcont = substr(mx_nlcont, 1, length(mx_nlcont) - 1)
692 function line_nlcont_done(){
696 if(mx_nlcontfun == NLCONT_SH_SS_COMM)
698 else if(mx_nlcontfun == NLCONT_MX_COMM)
700 else if(mx_nlcontfun == NLCONT_MX_CHECK_LINE)
703 fatal(EX_DATAERR, "mdocmx(1) implementation error: line_nlcont_done()")
705 $0 = lnd_save # simplify callees life
708 # .Mx is a line that we care about
709 /^[[:space:]]*[.'${APOSTROPHE}'][[:space:]]*M[Xx][[:space:]]*/{
713 if(NF > 1 && $2 == "-enable")
714 fatal(EX_USAGE, "\".Mx -enable\" may be used only once")
716 fatal(EX_DATAERR, "Line continuation too complicated for mdocmx(1)")
718 line_nlcont_add(NLCONT_MX_COMM)
722 }else if(NF < 2 || $2 != "-enable")
723 fatal(EX_USAGE, "\".Mx -enable\" must be the first \".Mx\" command")
729 # .Sh and .Ss are also lines we care about, but always store the data in
730 # main memory, since those commands occur in each mdoc file
731 /^[[:space:]]*[.'${APOSTROPHE}'][[:space:]]*S[hs][[:space:]]+/{
734 fatal(EX_DATAERR, "Line continuation too complicated for mdocmx(1)")
736 line_nlcont_add(NLCONT_SH_SS_COMM)
744 # All other lines are uninteresting unless mdocmx is -enabled and we have
745 # pending anchor creation requests on the stack
750 # TODO No support for any macro END but ..
751 if(/^[[:space:]]*[.'${APOSTROPHE}'][[:space:]]*dei?1?[[:space:]]+/){
753 fatal(EX_DATAERR, "Line continuation too complicated for mdocmx(1)")
755 while(getline && $0 !~ /^\.\.$/)
758 line_nlcont_add(NLCONT_MX_CHECK_LINE)
761 else if(mx_stack_cnt)
763 else if(/^[[:space:]]*\.(\\"|[[:space:]]*$)/)