3 # format-readme -- find and format a repository's readme blob
4 # Copyright (C) 2015,2016,2017,2019 Kyle J. McKay.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 'Usage: '"${0##*/}"' [-e]... [-r <prefix> | -p <prefix> [-i <imgprefix>]]
27 <path-to-repo.git> [<treeish>]
28 -a treat all plain text as .md
29 -e output error document on Markdown.pl errors
30 -w which blob contains the actual readme contents
31 -m <maxsize> maximum size of readme to allow (default is 32768)
32 -r <prefix> prefix non-absolute URLs with <prefix>
33 -p <prefix> prefix non-absolute URLs with <prefix>/<symlink-path>
34 -i <imgprefix> use <imgprefix> instead of <prefix> for images
35 --stub wrap the output in a full XHTML document stub
37 <path-to-repo.git> location where `git rev-parse --git-dir` works
38 <treeish> tree to use instead of "HEAD^{tree}"
40 With `-a` a plain text readme found with an explicit "text" (i.e. `.txt` or
41 `.text`) extension or no extension at all will be treated it as though it had a
42 `.md` extension instead and run through Markdown.pl. Adding a second `-a` will
43 treat unknown extension types as though they are `.md` too (not recommended).
45 Regardless of how many `-a` options are specified, extension types that are
46 known to be something other than plain text will never be treated as `.md`.
48 With `-e` if Markdown.pl finds validation errors, those errors will be output
49 together with a line-numbered source into a suitable `<pre>...</pre>` block
50 that can be displayed in place of the "readme" and '"${0##*/}"' exits with
53 With `-m` the maximum allowable size of the source blob for the "readme" can be
54 specified. By default the maximum size is 32768 (32K). This must be specified
55 in bytes and will automatically be rounded up to a minimum of 1024 except that
56 a value of 0 means unlimited (not recommended).
58 With `-w` rather than formatting the blob, output a single line with the format:
60 <hash> <size> <fmt> <name> <sympath>
62 Where "<hash>" is the actual blob hash, "<size>" is the size as reported by
63 `git ls-tree`, "<fmt>" is "md", "pod" or "txt" indicating the formatter to use
64 and "<name>" is the initial name and if that was a symlink the "<sympath>"
65 field will be present and is the relative path to the final blob. Note that
66 size is still enforced and will produce an error return rather than a result if
67 the size is too big. This is the one "semi-recommended" use of `-m 0`, but if
68 `-m` is not specified and `-w` is then `-m` defaults to 0 instead of 32K.
70 With `-r` all non-absolute URLs have <prefix> prefixed to them.
72 With `-p` all non-absolute URLs have "<prefix>/<symlink-path>" prefixed to them
73 where <symlink-path> is the dirname portion of the symlink if the selected
74 "readme" file is a symlink that needs to be followed. If it'\''s not a symlink or
75 it'\''s in the same directory, the "/<symlink-path>" part is not added but the
76 "<prefix>" part still is.
78 With `-i` (which also requires either `-r` or `-p`), if the target of the link
79 is an image, use "<imgprefix>" instead of "<prefix>". (If `-p` was used it
80 will still get the symlink path, if `-r` was used it won'\''t.)
84 die
() {>&2 printf '%s: fatal: %s\n' "${0##*/}" "$*"; exit 2;}
86 [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ] && [ -w "$TMPDIR" ] || TMPDIR
="/tmp"
90 [ -z "$rmfiles" ] ||
eval rm -f "$rmfiles" ||
:
114 optm
= optr
= optp
= opti
=
115 while [ $# -gt 0 ] && [ ${#1} -gt 1 ] && [ z
"-${1#-}" = z
"$1" ]; do
126 alltxt
="$(( ${alltxt:-0} + 1 ))"
142 [ -z "$optm" ] || die
"'-m' may only be used once"
144 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-m' requires an argument"
145 [ "${1#*[!0-9]}" = "$1" ] || die
"'-m' requires a whole number argument"
148 [ $maxlen -ge 1024 ] ||
[ "$maxlen" = "0" ] || maxlen
="1024"
152 [ -z "$optr" ] || die
"'-r' may only be used once"
153 [ -z "$optp" ] || die
"'-r' may not be used with '-p'"
155 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-r' requires an argument"
162 [ -z "$optp" ] || die
"'-p' may only be used once"
163 [ -z "$optr" ] || die
"'-p' may not be used with '-r'"
165 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-p' requires an argument"
173 [ -z "$opti" ] || die
"'-i' may only be used once"
175 [ $# -ge 1 ] && [ -n "$1" ] || die
"'-i' requires an argument"
181 die
"unrecognized option \`$1\` (-h for help)"
185 [ -z "$showblob" ] ||
[ -n "$optm" ] || maxlen
=0
186 [ -z "$opti" ] ||
[ -n "$optr$optp" ] || die
"'-i' also requires either '-r' or '-p'"
187 [ $# -le 2 ] || die
"too many arguments (-h for help)"
188 [ $# -ge 1 ] && [ -n "$1" ] || die
"missing first argument <path-to-repo.git> (-h for help)"
190 [ -n "$projdir" ] && [ -d "$projdir" ] ||
exit 2
191 cd "$projdir" ||
exit 2
193 gd
="$(git rev-parse --git-dir 2>&1)" ||
exit 2
196 tree
="$(git rev-parse --quiet --verify "$treeish"^{tree} 2>/dev/null)" ||
exit 2
198 # We prefer the first file or symlink we find with
199 # a supported extension and then we will follow it
200 # if it's a relative symlink with no '.' or '..' components.
201 # If we don't find a supported extension, we use just plain README
202 # which we assume to be plain text (and we will follow a symlink).
203 # We prefer a markdown extension over others and any extension
204 # other than plain text next followed by plain text and then no extension.
207 ! perl
-MPod::Html
-e 1 >/dev
/null
2>&1 || haspod
=1
211 # .textile, .rdoc, .org, .creole, .mediawiki/.wiki
212 # .rst, .asciidoc/.adoc/.asc
213 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Ee
][Xx
][Tt
][Ii
][Ll
][Ee
]|\
214 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Dd
][Oo
][Cc
]|\
215 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Oo
][Rr
][Gg
]|\
216 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Cc
][Rr
][Ee
][Oo
][Ll
][Ee
]|\
217 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Ee
][Dd
][Ii
][Aa
][Ww
][Ii
][Kk
][Ii
]|\
218 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Ww
][Ii
][Kk
][Ii
]|\
219 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Ss
][Tt
]|\
220 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Ss
][Cc
][Ii
][Ii
][Dd
][Oo
][Cc
]|\
221 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Dd
][Oo
][Cc
]|\
222 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Aa
][Ss
][Cc
])
242 while read -r mode
type hash size name
; do
243 [ "$mode" = "100644" ] ||
[ "$mode" = "100755" ] ||
[ "$mode" = "120000" ] ||
continue
244 [ "$size" != "0" ] && [ "$size" != "-" ] ||
continue
245 [ "$type" = "blob" ] ||
continue
246 [ "$mode" != "120000" ] ||
[ "$size" -lt 1024 ] ||
continue
247 [ "$mode" != "120000" ] || symlinks
="$symlinks$hash $name$nl"
250 # Markdown extensions must match this pattern:
251 # /md|rmd|mkdn?|mdwn|mdown|markdown|litcoffee/i
252 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
]|\
253 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Rr
][Mm
][Dd
]|\
254 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Kk
][Dd
]|\
255 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Kk
][Dd
][Nn
]|\
256 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
][Ww
][Nn
]|\
257 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Dd
][Oo
][Ww
][Nn
]|\
258 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Mm
][Aa
][Rr
][Kk
][Dd
][Oo
][Ww
][Nn
]|\
259 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Ll
][Ii
][Tt
][Cc
][Oo
][Ff
][Ff
][Ee
][Ee
])
260 if [ -n "$readmeext" ]; then
261 [ "$readmeextfmt" != "md" ] ||
[ "$mode" != "120000" ] ||
continue
267 [ "$mode" != "120000" ] || readmeextlnk
=1
271 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Pp
][Oo
][Dd
])
272 [ -n "$haspod" ] ||
continue
273 if [ -n "$readmeext" ]; then
274 [ "$readmeextfmt" != "md" ] ||
continue
275 [ "$readmeextfmt" = "txt" ] ||
[ "$mode" != "120000" ] ||
continue
281 [ "$mode" != "120000" ] || readmeextlnk
=1
285 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Xx
][Tt
]|\
286 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].
[Tt
][Ee
][Xx
][Tt
])
287 if [ -n "$readmeext" ]; then
288 [ "$readmeextfmt" = "txt" ] && [ "$mode" != "120000" ] ||
continue
294 [ "$mode" != "120000" ] || readmeextlnk
=1
298 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
])
299 [ -z "$readme" ] ||
[ "$mode" != "120000" ] ||
continue
304 [ "$mode" != "120000" ] || readmelnk
=1
308 [Rr
][Ee
][Aa
][Dd
][Mm
][Ee
].?
*)
309 [ -z "$readmefb" ] ||
[ "$mode" != "120000" ] ||
continue
310 [ "${name%.*}" = "${name%%.*}" ] ||
continue
311 [ "${name#*.}" = "${name##*[!A-Za-z0-9+_]}" ] ||
continue
312 [ "${name%[$ws]*}" = "$name" ] ||
continue
317 [ "$mode" != "120000" ] || readmefblnk
=1
322 $(git ls-tree -l $tree)
324 if [ -n "$readmefb" ] && [ -z "$readme" ]; then
326 readmenm
="$readmefbnm"
327 readmesiz
="$readmefbsiz"
328 readmelnk
="$readmefblnk"
330 case "$readmenm" in *.
[Ff
][Ii
][Rr
][Ss
][Tt
])
334 if [ -n "$readme" ] && [ -z "$readmeext" ]; then
336 readmeextnm
="$readmenm"
337 readmeextsiz
="$readmesiz"
338 readmeextlnk
="$readmelnk"
339 readmeextfmt
="$readmefmt"
341 [ -n "$readmeext" ] ||
exit 1
342 if [ ${alltxt:-0} -gt 0 ]; then
343 [ "$readmeextfmt" != "txt" ] || readmeextfmt
=md
345 [ -z "$readmeextfmt" ] && [ ${alltxt:-0} -gt 1 ] &&
346 ! notplain
"$readmeextnm"
352 if [ -n "$readmeextlnk" ]; then
353 rel
="$(git cat-file blob $readmeext 2>/dev/null)" ||
exit 1
354 case "$rel" in /*) exit 1; esac
355 case "/$rel/" in */..
/*|
*/.
/*) exit 1; esac
356 case "$rel" in */*) :;; ?
*)
357 while read -r hash name
; do
358 if [ -n "$hash" ] && [ "$name" = "$rel" ]; then
359 rel2
="$(git cat-file blob $hash 2>/dev/null)" ||
exit 1
360 case "$rel2" in /*) exit 1; esac
361 case "/$rel2/" in */..
/*|
*/.
/*) exit 1; esac
369 case "$rel" in *?
/?
*)
371 prefix
="${rel%/$suffix}"
372 while read -r hash name
; do
373 if [ -n "$hash" ] && [ "$name" = "$prefix" ]; then
374 rel2
="$(git cat-file blob $hash 2>/dev/null)" ||
exit 1
375 case "$rel2" in /*) exit 1; esac
376 case "/$rel2/" in */..
/*|
*/.
/*) exit 1; esac
384 [ -z "$showblob" ] || blobsym
="$rel"
385 if [ -n "$addpath" ]; then
386 dir
="$(dirname "$rel")"
387 if [ "$dir" != "." ]; then
388 urlprefix
="${urlprefix%/}/$dir"
389 [ -z "$imgprefix" ] || imgprefix
="${imgprefix%/}/$dir"
392 read -r mode
type hash size name
<<EOT
393 $(git ls-tree -l $tree -- "$rel")
395 [ "$mode" = "100644" ] ||
[ "$mode" = "100755" ] ||
exit 1
396 [ "$type" = "blob" ] ||
exit 1
397 [ "$size" != "0" ] && [ "$size" != "-" ] ||
exit 1
402 # Allow up to $maxlen (32K by default)
403 # But fail if it doesn't look like it's text
404 # A $maxlen of 0 means unlimited (but that's not recommended)
405 # Length is checked twice:
406 # 1) here based on ls-tree size
407 # 2) perl based on the actual blob contents size
409 [ "$maxlen" = "0" ] ||
[ "$readmeextsiz" -le "$maxlen" ] ||
exit 1
411 if [ -n "$showblob" ]; then
412 printf '%s %s %s %s%s\n' "$readmeext" "$readmeextsiz" \
413 "${readmeextfmt:-txt}" "$readmeextnm" "${blobsym:+ $blobsym}"
418 [ "$maxlen" = "0" ] || andmaxok
=' && length($contents) <= '"$maxlen"
419 contents
="$(git cat-file blob $readmeext | perl -e '
423 binmode STDIN, ":perlio
" or exit 1
424 unless grep /^perlio$/, PerlIO::get_layers(STDIN);
425 exit 1 unless -T STDIN;
427 my $contents = <STDIN>;
428 exit 1 unless defined($contents) && length($contents) > 0'"$andmaxok"';
431 ' 2>/dev/null)" ||
exit 1
433 mdpl
='Markdown.pl' args
=
435 printf '# %s\n' "$*" |
"$mdpl" --stub | LC_ALL
=C
awk '/^<h1/{exit}{print}'
438 printf '%s\n' "$*</div>" "</body>" "</html>"
442 case "${readmeextfmt:-txt}" in
445 # Run Markdown.pl on it
446 if [ -n "$addprefix" ]; then
447 args
="$args -r \"$urlprefix\""
448 [ -z "$imgprefix" ] || args
="$args -i \"$imgprefix\""
450 if [ ! "$showerr" ]; then
451 [ -z "$stub" ] || showstub
"$readmeextnm"
452 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
454 printf '%s' "$contents" |
eval "$mdpl $args 2>/dev/null" || err
=$?
455 [ -z "$stub" ] || showfoot
458 tmph
="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$
-htm-XXXXXX")"
459 tmpe
="$(mktemp "${TMPDIR:+$TMPDIR/}fmtrdme-$$
-err-XXXXXX")"
460 rmfiles
="$rmfiles "'"$tmph" "$tmpe"'
461 rm -f "$tmph" "$tmpe" && >"$tmph" && >"$tmpe"
462 if printf '%s' "$contents" |
eval "\"\$mdpl\" $args "'>"$tmph" 2>"$tmpe"'; then
463 [ -z "$stub" ] || showstub
"$readmeextnm"
464 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
466 [ -z "$stub" ] || showfoot
468 [ -s "$tmpe" ] ||
echo "unknown errors" >"$tmpe"
469 [ -z "$stub" ] || showstub
"$readmeextnm errors"
470 printf '<!-- README NAME: %s errors -->\n<pre>' "$readmeextnm"
471 <"$tmpe" LC_ALL
=C
sed -e 's/&/\&/g' -e 's/</\</g'
473 printf '%s' "$contents" |
474 eval "\"\$mdpl\" --no-sanitize --no-validate-xml $args 2>/dev/null >\"$tmph\""
475 lines
="$(( $(wc -l <"$tmph")+0 ))"
476 <"$tmph" LC_ALL
=C
awk -v w
="${#lines}" \
477 '{gsub(/&/,"\\&");gsub(/</,"\\<");printf("%*u %s\n",w,NR,$0)}'
479 [ -z "$stub" ] || showfoot
"$nl"
486 # Run pod2html and extract the contents
488 if [ -n "$addprefix" ] && [ -n "${urlprefix%/}" ]; then
489 arg
=", \"--htmlroot=${urlprefix%/}\""
491 [ -z "$stub" ] || showstub
"$readmeextnm"
492 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
494 printf '%s' "$contents" | \
495 perl
-MPod::Html
-e "pod2html \"--quiet\", \"--no-index\"$arg" 2>/dev
/null | \
500 my $contents = <STDIN>;
501 $contents =~ s,^.*<body[^>]*>\s*,,is;
502 $contents =~ s,\s*</body[^>]*>.*$,,is;
503 $contents =~ s,^.*<!--\s*INDEX\s+END\s*-->\s*,,is;
504 $contents =~ s,^\s*(?:<p>\s*</p>\s*)+,,is;
507 [ -z "$stub" ] || showfoot
512 # It's a <pre> block but we need some escaping
513 [ -z "$stub" ] || showstub
"$readmeextnm"
514 printf '<!-- README NAME: %s -->\n' "$readmeextnm"
515 printf '%s' '<pre class="plaintext">'
516 printf '%s' "$contents" | LC_ALL
=C
sed -e 's/&/\&/g' -e 's/</\</g'
517 printf '%s\n' '</pre>'
518 [ -z "$stub" ] || showfoot