install.sh: update XDL_FAST_HASH warning URL again
[girocco/readme.git] / bin / format-readme
blob43680749127254eb38b686494e1bfe4a739073f8
1 #!/bin/sh
3 # format-readme -- find and format a repository's readme blob
4 # Copyright (C) 2015,2016,2017,2019 Kyle J. McKay.
5 # All rights reserved.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # Version 1.2.9
22 set -e
24 usage() {
25 printf '%s' \
26 'Usage: '"${0##*/}"' [-e]... [-r <prefix> | -p <prefix> [-i <imgprefix>]]
27 <path-to-repo.git> [<treeish>]
28 -a treat all plain text as .md
29 -e output error document on Markdown.pl errors
30 -w which blob contains the actual readme contents
31 -m <maxsize> maximum size of readme to allow (default is 32768)
32 -r <prefix> prefix non-absolute URLs with <prefix>
33 -p <prefix> prefix non-absolute URLs with <prefix>/<symlink-path>
34 -i <imgprefix> use <imgprefix> instead of <prefix> for images
35 --stub wrap the output in a full XHTML document stub
37 <path-to-repo.git> location where `git rev-parse --git-dir` works
38 <treeish> tree to use instead of "HEAD^{tree}"
40 With `-a` a plain text readme found with an explicit "text" (i.e. `.txt` or
41 `.text`) extension or no extension at all will be treated it as though it had a
42 `.md` extension instead and run through Markdown.pl. Adding a second `-a` will
43 treat unknown extension types as though they are `.md` too (not recommended).
45 Regardless of how many `-a` options are specified, extension types that are
46 known to be something other than plain text will never be treated as `.md`.
48 With `-e` if Markdown.pl finds validation errors, those errors will be output
49 together with a line-numbered source into a suitable `<pre>...</pre>` block
50 that can be displayed in place of the "readme" and '"${0##*/}"' exits with
51 success.
53 With `-m` the maximum allowable size of the source blob for the "readme" can be
54 specified. By default the maximum size is 32768 (32K). This must be specified
55 in bytes and will automatically be rounded up to a minimum of 1024 except that
56 a value of 0 means unlimited (not recommended).
58 With `-w` rather than formatting the blob, output a single line with the format:
60 <hash> <size> <fmt> <name> <sympath>
62 Where "<hash>" is the actual blob hash, "<size>" is the size as reported by
63 `git ls-tree`, "<fmt>" is "md", "pod" or "txt" indicating the formatter to use
64 and "<name>" is the initial name and if that was a symlink the "<sympath>"
65 field will be present and is the relative path to the final blob. Note that
66 size is still enforced and will produce an error return rather than a result if
67 the size is too big. This is the one "semi-recommended" use of `-m 0`, but if
68 `-m` is not specified and `-w` is then `-m` defaults to 0 instead of 32K.
70 With `-r` all non-absolute URLs have <prefix> prefixed to them.
72 With `-p` all non-absolute URLs have "<prefix>/<symlink-path>" prefixed to them
73 where <symlink-path> is the dirname portion of the symlink if the selected
74 "readme" file is a symlink that needs to be followed. If it'\''s not a symlink or
75 it'\''s in the same directory, the "/<symlink-path>" part is not added but the
76 "<prefix>" part still is.
78 With `-i` (which also requires either `-r` or `-p`), if the target of the link
79 is an image, use "<imgprefix>" instead of "<prefix>". (If `-p` was used it
80 will still get the symlink path, if `-r` was used it won'\''t.)
84 die() {>&2 printf '%s: fatal: %s\n' "${0##*/}" "$*"; exit 2;}
85 TMPDIR="${TMPDIR%/}"
86 [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ] && [ -w "$TMPDIR" ] || TMPDIR="/tmp"
88 rmfiles=
89 cleanup() {
90 [ -z "$rmfiles" ] || eval rm -f "$rmfiles" || :
93 trap 'exit 129' HUP
94 trap 'exit 130' INT
95 trap 'exit 131' QUIT
96 trap 'exit 134' ABRT
97 trap 'exit 141' PIPE
98 trap 'exit 143' TERM
99 trap 'cleanup' EXIT
101 nl='
103 symlinks=
105 alltxt=
106 stub=
107 showblob=
108 maxlen=32768
109 showerr=
110 addprefix=
111 addpath=
112 urlprefix=
113 imgprefix=
114 optm= optr= optp= opti=
115 while [ $# -gt 0 ] && [ ${#1} -gt 1 ] && [ z"-${1#-}" = z"$1" ]; do
116 case "$1" in
117 "-h"|"--help")
118 usage
119 exit 0
121 "--")
122 shift
123 break
125 "-a")
126 alltxt="$(( ${alltxt:-0} + 1 ))"
127 shift
129 "-e")
130 showerr=1
131 shift
133 "-w")
134 showblob=1
135 shift
137 "--stub")
138 stub=1
139 shift
141 "-m")
142 [ -z "$optm" ] || die "'-m' may only be used once"
143 shift
144 [ $# -ge 1 ] && [ -n "$1" ] || die "'-m' requires an argument"
145 [ "${1#*[!0-9]}" = "$1" ] || die "'-m' requires a whole number argument"
146 optm=1
147 maxlen="$(( $1+0 ))"
148 [ $maxlen -ge 1024 ] || [ "$maxlen" = "0" ] || maxlen="1024"
149 shift
151 "-r")
152 [ -z "$optr" ] || die "'-r' may only be used once"
153 [ -z "$optp" ] || die "'-r' may not be used with '-p'"
154 shift
155 [ $# -ge 1 ] && [ -n "$1" ] || die "'-r' requires an argument"
156 optr=1
157 addprefix=1
158 urlprefix="${1%/}/"
159 shift
161 "-p")
162 [ -z "$optp" ] || die "'-p' may only be used once"
163 [ -z "$optr" ] || die "'-p' may not be used with '-r'"
164 shift
165 [ $# -ge 1 ] && [ -n "$1" ] || die "'-p' requires an argument"
166 optp=1
167 addprefix=1
168 addpath=1
169 urlprefix="${1%/}/"
170 shift
172 "-i")
173 [ -z "$opti" ] || die "'-i' may only be used once"
174 shift
175 [ $# -ge 1 ] && [ -n "$1" ] || die "'-i' requires an argument"
176 opti=1
177 imgprefix="${1%/}/"
178 shift
181 die "unrecognized option \`$1\` (-h for help)"
183 esac
184 done
185 [ -z "$showblob" ] || [ -n "$optm" ] || maxlen=0
186 [ -z "$opti" ] || [ -n "$optr$optp" ] || die "'-i' also requires either '-r' or '-p'"
187 [ $# -le 2 ] || die "too many arguments (-h for help)"
188 [ $# -ge 1 ] && [ -n "$1" ] || die "missing first argument <path-to-repo.git> (-h for help)"
189 projdir="$1"
190 [ -n "$projdir" ] && [ -d "$projdir" ] || exit 2
191 cd "$projdir" || exit 2
192 unset GIT_DIR
193 gd="$(git rev-parse --git-dir 2>&1)" || exit 2
194 cd "$gd" || exit 2
195 treeish="${2:-HEAD}"
196 tree="$(git rev-parse --quiet --verify "$treeish"^{tree} 2>/dev/null)" || exit 2
198 # We prefer the first file or symlink we find with
199 # a supported extension and then we will follow it
200 # if it's a relative symlink with no '.' or '..' components.
201 # If we don't find a supported extension, we use just plain README
202 # which we assume to be plain text (and we will follow a symlink).
203 # We prefer a markdown extension over others and any extension
204 # other than plain text next followed by plain text and then no extension.
206 haspod=
207 ! perl -MPod::Html -e 1 >/dev/null 2>&1 || haspod=1
209 notplain() {
210 case "$1" in
211 # .textile, .rdoc, .org, .creole, .mediawiki/.wiki
212 # .rst, .asciidoc/.adoc/.asc
213 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Ee][Xx][Tt][Ii][Ll][Ee]|\
214 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Dd][Oo][Cc]|\
215 [Rr][Ee][Aa][Dd][Mm][Ee].[Oo][Rr][Gg]|\
216 [Rr][Ee][Aa][Dd][Mm][Ee].[Cc][Rr][Ee][Oo][Ll][Ee]|\
217 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Ee][Dd][Ii][Aa][Ww][Ii][Kk][Ii]|\
218 [Rr][Ee][Aa][Dd][Mm][Ee].[Ww][Ii][Kk][Ii]|\
219 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Ss][Tt]|\
220 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Ss][Cc][Ii][Ii][Dd][Oo][Cc]|\
221 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Dd][Oo][Cc]|\
222 [Rr][Ee][Aa][Dd][Mm][Ee].[Aa][Ss][Cc])
223 return 0
224 esac
225 return 1
228 readme=
229 readmenm=
230 readmelnk=
231 readmefmt=
232 readmesiz=
233 readmefb=
234 readmefbnm=
235 readmefblnk=
236 readmefbsiz=
237 readmeext=
238 readmeextnm=
239 readmeextlnk=
240 readmeextfmt=
241 readmeextsiz=
242 while read -r mode type hash size name; do
243 [ "$mode" = "100644" ] || [ "$mode" = "100755" ] || [ "$mode" = "120000" ] || continue
244 [ "$size" != "0" ] && [ "$size" != "-" ] || continue
245 [ "$type" = "blob" ] || continue
246 [ "$mode" != "120000" ] || [ "$size" -lt 1024 ] || continue
247 [ "$mode" != "120000" ] || symlinks="$symlinks$hash $name$nl"
248 case "$name" in
250 # Markdown extensions must match this pattern:
251 # /md|rmd|mkdn?|mdwn|mdown|markdown|litcoffee/i
252 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd]|\
253 [Rr][Ee][Aa][Dd][Mm][Ee].[Rr][Mm][Dd]|\
254 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Kk][Dd]|\
255 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Kk][Dd][Nn]|\
256 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd][Ww][Nn]|\
257 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Dd][Oo][Ww][Nn]|\
258 [Rr][Ee][Aa][Dd][Mm][Ee].[Mm][Aa][Rr][Kk][Dd][Oo][Ww][Nn]|\
259 [Rr][Ee][Aa][Dd][Mm][Ee].[Ll][Ii][Tt][Cc][Oo][Ff][Ff][Ee][Ee])
260 if [ -n "$readmeext" ]; then
261 [ "$readmeextfmt" != "md" ] || [ "$mode" != "120000" ] || continue
263 readmeext="$hash"
264 readmeextnm="$name"
265 readmeextsiz="$size"
266 readmeextlnk=
267 [ "$mode" != "120000" ] || readmeextlnk=1
268 readmeextfmt=md
271 [Rr][Ee][Aa][Dd][Mm][Ee].[Pp][Oo][Dd])
272 [ -n "$haspod" ] || continue
273 if [ -n "$readmeext" ]; then
274 [ "$readmeextfmt" != "md" ] || continue
275 [ "$readmeextfmt" = "txt" ] || [ "$mode" != "120000" ] || continue
277 readmeext="$hash"
278 readmeextnm="$name"
279 readmeextsiz="$size"
280 readmeextlnk=
281 [ "$mode" != "120000" ] || readmeextlnk=1
282 readmeextfmt=pod
285 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Xx][Tt]|\
286 [Rr][Ee][Aa][Dd][Mm][Ee].[Tt][Ee][Xx][Tt])
287 if [ -n "$readmeext" ]; then
288 [ "$readmeextfmt" = "txt" ] && [ "$mode" != "120000" ] || continue
290 readmeext="$hash"
291 readmeextnm="$name"
292 readmeextsiz="$size"
293 readmeextlnk=
294 [ "$mode" != "120000" ] || readmeextlnk=1
295 readmeextfmt=txt
298 [Rr][Ee][Aa][Dd][Mm][Ee])
299 [ -z "$readme" ] || [ "$mode" != "120000" ] || continue
300 readme="$hash"
301 readmenm="$name"
302 readmesiz="$size"
303 readmelnk=
304 [ "$mode" != "120000" ] || readmelnk=1
305 readmefmt=txt
308 [Rr][Ee][Aa][Dd][Mm][Ee].?*)
309 [ -z "$readmefb" ] || [ "$mode" != "120000" ] || continue
310 [ "${name%.*}" = "${name%%.*}" ] || continue
311 [ "${name#*.}" = "${name##*[!A-Za-z0-9+_]}" ] || continue
312 [ "${name%[$ws]*}" = "$name" ] || continue
313 readmefb="$hash"
314 readmefbnm="$name"
315 readmefbsiz="$size"
316 readmefblnk=
317 [ "$mode" != "120000" ] || readmefblnk=1
320 esac
321 done <<EOT
322 $(git ls-tree -l $tree)
324 if [ -n "$readmefb" ] && [ -z "$readme" ]; then
325 readme="$readmefb"
326 readmenm="$readmefbnm"
327 readmesiz="$readmefbsiz"
328 readmelnk="$readmefblnk"
329 readmefmt=""
330 case "$readmenm" in *.[Ff][Ii][Rr][Ss][Tt])
331 readmefmt=txt
332 esac
334 if [ -n "$readme" ] && [ -z "$readmeext" ]; then
335 readmeext="$readme"
336 readmeextnm="$readmenm"
337 readmeextsiz="$readmesiz"
338 readmeextlnk="$readmelnk"
339 readmeextfmt="$readmefmt"
341 [ -n "$readmeext" ] || exit 1
342 if [ ${alltxt:-0} -gt 0 ]; then
343 [ "$readmeextfmt" != "txt" ] || readmeextfmt=md
345 [ -z "$readmeextfmt" ] && [ ${alltxt:-0} -gt 1 ] &&
346 ! notplain "$readmeextnm"
347 then
348 readmeextfmt=md
351 blobsym=
352 if [ -n "$readmeextlnk" ]; then
353 rel="$(git cat-file blob $readmeext 2>/dev/null)" || exit 1
354 case "$rel" in /*) exit 1; esac
355 case "/$rel/" in */../*|*/./*) exit 1; esac
356 case "$rel" in */*) :;; ?*)
357 while read -r hash name; do
358 if [ -n "$hash" ] && [ "$name" = "$rel" ]; then
359 rel2="$(git cat-file blob $hash 2>/dev/null)" || exit 1
360 case "$rel2" in /*) exit 1; esac
361 case "/$rel2/" in */../*|*/./*) exit 1; esac
362 rel="$rel2"
363 break
365 done <<-EOT
366 ${symlinks%?}
368 esac
369 case "$rel" in *?/?*)
370 suffix="${rel#*/}"
371 prefix="${rel%/$suffix}"
372 while read -r hash name; do
373 if [ -n "$hash" ] && [ "$name" = "$prefix" ]; then
374 rel2="$(git cat-file blob $hash 2>/dev/null)" || exit 1
375 case "$rel2" in /*) exit 1; esac
376 case "/$rel2/" in */../*|*/./*) exit 1; esac
377 rel="$rel2/$suffix"
378 break
380 done <<-EOT
381 ${symlinks%?}
383 esac
384 [ -z "$showblob" ] || blobsym="$rel"
385 if [ -n "$addpath" ]; then
386 dir="$(dirname "$rel")"
387 if [ "$dir" != "." ]; then
388 urlprefix="${urlprefix%/}/$dir"
389 [ -z "$imgprefix" ] || imgprefix="${imgprefix%/}/$dir"
392 read -r mode type hash size name <<EOT
393 $(git ls-tree -l $tree -- "$rel")
395 [ "$mode" = "100644" ] || [ "$mode" = "100755" ] || exit 1
396 [ "$type" = "blob" ] || exit 1
397 [ "$size" != "0" ] && [ "$size" != "-" ] || exit 1
398 readmeext="$hash"
399 readmeextsiz="$size"
402 # Allow up to $maxlen (32K by default)
403 # But fail if it doesn't look like it's text
404 # A $maxlen of 0 means unlimited (but that's not recommended)
405 # Length is checked twice:
406 # 1) here based on ls-tree size
407 # 2) perl based on the actual blob contents size
409 [ "$maxlen" = "0" ] || [ "$readmeextsiz" -le "$maxlen" ] || exit 1
411 if [ -n "$showblob" ]; then
412 printf '%s %s %s %s%s\n' "$readmeext" "$readmeextsiz" \
413 "${readmeextfmt:-txt}" "$readmeextnm" "${blobsym:+ $blobsym}"
414 exit 0
417 andmaxok=
418 [ "$maxlen" = "0" ] || andmaxok=' && length($contents) <= '"$maxlen"
419 contents="$(git cat-file blob $readmeext | perl -e '
420 use 5.008;
421 use strict;
422 use warnings;
423 binmode STDIN, ":perlio" or exit 1
424 unless grep /^perlio$/, PerlIO::get_layers(STDIN);
425 exit 1 unless -T STDIN;
426 undef $/;
427 my $contents = <STDIN>;
428 exit 1 unless defined($contents) && length($contents) > 0'"$andmaxok"';
429 print $contents;
430 exit 0;
431 ' 2>/dev/null)" || exit 1
433 mdpl='Markdown.pl' args=
434 showstub() {
435 printf '# %s\n' "$*" | "$mdpl" --stub | LC_ALL=C awk '/^<h1/{exit}{print}'
437 showfoot() {
438 printf '%s\n' "$*</div>" "</body>" "</html>"
441 # Format the result
442 case "${readmeextfmt:-txt}" in
445 # Run Markdown.pl on it
446 if [ -n "$addprefix" ]; then
447 args="$args -r \"$urlprefix\""
448 [ -z "$imgprefix" ] || args="$args -i \"$imgprefix\""
450 if [ ! "$showerr" ]; then
451 [ -z "$stub" ] || showstub "$readmeextnm"
452 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
453 err=0
454 printf '%s' "$contents" | eval "$mdpl $args 2>/dev/null" || err=$?
455 [ -z "$stub" ] || showfoot
456 exit $err
457 else
458 tmph="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$-htm-XXXXXX")"
459 tmpe="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$-err-XXXXXX")"
460 rmfiles="$rmfiles "'"$tmph" "$tmpe"'
461 rm -f "$tmph" "$tmpe" && >"$tmph" && >"$tmpe"
462 if printf '%s' "$contents" | eval "\"\$mdpl\" $args "'>"$tmph" 2>"$tmpe"'; then
463 [ -z "$stub" ] || showstub "$readmeextnm"
464 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
465 cat "$tmph"
466 [ -z "$stub" ] || showfoot
467 else
468 [ -s "$tmpe" ] || echo "unknown errors" >"$tmpe"
469 [ -z "$stub" ] || showstub "$readmeextnm errors"
470 printf '<!-- README NAME: %s errors -->\n<pre>' "$readmeextnm"
471 <"$tmpe" LC_ALL=C sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g'
472 echo ""
473 printf '%s' "$contents" |
474 eval "\"\$mdpl\" --no-sanitize --no-validate-xml $args 2>/dev/null >\"$tmph\""
475 lines="$(( $(wc -l <"$tmph")+0 ))"
476 <"$tmph" LC_ALL=C awk -v w="${#lines}" \
477 '{gsub(/&/,"\\&amp;");gsub(/</,"\\&lt;");printf("%*u %s\n",w,NR,$0)}'
478 printf '%s' '</pre>'
479 [ -z "$stub" ] || showfoot "$nl"
481 exit 0
485 pod)
486 # Run pod2html and extract the contents
487 arg=
488 if [ -n "$addprefix" ] && [ -n "${urlprefix%/}" ]; then
489 arg=", \"--htmlroot=${urlprefix%/}\""
491 [ -z "$stub" ] || showstub "$readmeextnm"
492 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
493 err=0
494 printf '%s' "$contents" | \
495 perl -MPod::Html -e "pod2html \"--quiet\", \"--no-index\"$arg" 2>/dev/null | \
496 perl -e '
497 use strict;
498 use warnings;
499 undef $/;
500 my $contents = <STDIN>;
501 $contents =~ s,^.*<body[^>]*>\s*,,is;
502 $contents =~ s,\s*</body[^>]*>.*$,,is;
503 $contents =~ s,^.*<!--\s*INDEX\s+END\s*-->\s*,,is;
504 $contents =~ s,^\s*(?:<p>\s*</p>\s*)+,,is;
505 print $contents;
506 ' || err=$?
507 [ -z "$stub" ] || showfoot
508 exit $err
511 txt)
512 # It's a <pre> block but we need some escaping
513 [ -z "$stub" ] || showstub "$readmeextnm"
514 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
515 printf '%s' '<pre class="plaintext">'
516 printf '%s' "$contents" | LC_ALL=C sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g'
517 printf '%s\n' '</pre>'
518 [ -z "$stub" ] || showfoot
519 exit 0
522 esac
523 exit 1