combine-packs.sh: update to latest
[girocco/readme.git] / jobd / combine-packs.sh
blob21bda31d517c1f9488ca6f60cf5fe0b0425a5cb7
1 #!/bin/sh
3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Version 1.1.18
21 USAGE="
22 printf '%s\n' path-to-pack[.idx|.pack] ... |
23 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus '\n' to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --ignore-missing-objects
36 silently ignore missing objects (explicit objects when
37 using --objects otherwise those contained in input packs)
39 --loose add the list of all currently existing loose objects in
40 the repository to the list of objects to pack
42 --objects input is a list of object hash id values instead of packs
44 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
45 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
47 --sort-tags-by-id
48 sort tags by object id rather than embedded tag name
49 using this option avoids using perl when tags are present
51 If --replace is given, ALL packs to be combined MUST be located in
52 the objects/pack subdirectory of the current git directory AND the output
53 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
55 The --loose option can be used both with and without the --objects option. If
56 there are no currently existing loose objects in the repository's objects/
57 directory then it's effectively silently ignored.
59 Note that if --objects is used then --replace and --ignore-missing are invalid.
61 Unless --ignore-missing-objects is given, any input objects (either given
62 explicitly when using --objects otherwise those contained in the input packs)
63 that are not present in the current git directory (respecting the value of
64 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
65 directories, if any, will cause combine-packs to fail.
66 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
67 the output pack(s)!
69 Unless the --sort-tags-by-id option is used then perl will be used if available
70 and any tag objects are present in the input. It provides the only efficient
71 way to extract the embedded tag name from a batch of tag objects reliably.
72 However, since the only reason the tag name is extracted is to sort the tag
73 objects for better tag deltification, if the tag objects are sorted by the
74 tag object id there is never any need to run perl. In practice, tag objects
75 rarely generate deltas and there are almost never enough tag objects in the
76 first place for the size savings of the almost-never-happens tag
77 deltification to matter anyway. This option will be activated automatically
78 if perl does not appear to be available.
80 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
81 the current git directory (as output by \`git rev-parse --git-dir\`).
83 If a <pack-name> does not exist and contains no '/' characters then it is
84 retried as objects/pack/<pack-name> instead.
86 Packs to be combined MUST have an associated .idx file.
88 The pack-base-name may be a relative path name and if so, is ALWAYS relative
89 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
91 If not given, then the pack-base-name defaults to objects/pack/pack
92 relative to the current git directory.
94 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
95 is given to allow it) then everywhere above where it says \"objects/\" is
96 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
97 And, obviously, that location is no longer necessarily a subdirectory of the
98 current git directory either.
100 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
101 the ONLY option that is automatically passed (but remember that --reuse-delta
102 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
104 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
105 --stdout are forbidden. Although --keep-true-parents is allowed it should
106 not have any effect at all. Using --incremental is recommended only for
107 wizards or with --objects as in most other cases it will result in an empty
108 pack being output. The combination of --loose --objects --incremental will
109 pack up all loose objects not already in a pack (and nothing else if standard
110 input is redirected to /dev/null in which case the --objects is optional).
112 WARNING: the move_aside logic currently only works when pack-base-name is
113 completely omitted!
116 set -e
118 # $$ should be the same in subshells, but just in case, remember it
119 cp_pid=$$
121 perlprog='
122 #!/usr/bin/perl
123 #line 100 "combine-packs.sh"
124 use strict;
125 use warnings;
127 sub discard {
128 my $count = shift;
129 my $x = "";
130 while ($count >= 32768) {
131 read(STDIN, $x, 32768);
132 $count -= 32768;
134 read(STDIN, $x, $count) if $count;
137 my @tags = ();
138 binmode STDIN;
139 while (<STDIN>) {
140 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
141 my ($h, $t, $l) = ($1, $2, $3);
142 my $te = 0;
143 my $tn = "";
144 discard(1 + $l), next unless $2 eq "tag";
145 my $count = 0;
146 while (<STDIN>) {
147 $count += length($_);
148 chomp;
149 last if /^$/;
150 $tn = $1 if /^tag ([^ ]+)$/;
151 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
152 last if $tn && $te;
154 discard(1 + $l - $count);
155 push(@tags, [$te, "$h $tn\n"]);
158 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
161 # On some broken platforms running xargs without -r and empty input runs the command
162 xargs_r="$(: | command xargs echo -r)"
164 # Some platforms' broken xargs runs the command always at least once even if
165 # there's no input unless given a special option. Automatically supply the
166 # option on those platforms by providing an xargs function.
167 xargs() { command xargs $xargs_r "$@"; }
170 zap=
172 gdo=
174 cleanup_on_exit() {
175 ewf=
176 [ -n "$td" ] && [ -e "$td/success" ] || ewf=1
177 [ -z "$td" ] || ! [ -e "$td" ] || rm -rf "$td" || :
178 [ -z "$gdo" -o -z "$zap" ] || command find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
179 [ -z "$ewf" ] || echo "combine_packs: exiting with failure" >&2 || :
182 trap cleanup_on_exit EXIT
183 trap 'exit 129' HUP
184 trap 'exit 130' INT
185 trap 'exit 131' QUIT
186 trap 'exit 143' TERM
188 die() {
189 echo "combine-packs: fatal: $*" >&2 || :
190 # In case we are in a sub shell force the entire command to exit
191 # The trap on TERM will make sure cleanup still happens in this case
192 extrapid=
193 [ -z "$td" ] || [ ! -s "$td/popid" ] || extrapid=$(cat "$td/popid" || :)
194 kill $cp_pid $extrapid || :
195 exit 1
198 cmd_path() (
199 "unset" -f unalias command "$1" >/dev/null 2>&1 || :
200 "unalias" -a >/dev/null 2>&1 || :
201 "command" -v "$1"
202 ) 2>/dev/null
204 # This extra indirection shouldn't be necessary, but it is for some broken sh
205 # in order for a failure to not prematurely exit die_on_fail with set -e active
206 do_command() (
207 # some shells do not handle "exec command ..." properly but just a
208 # plain "exec ..." has the same semantics so "command" is omitted here
209 LC_ALL=C exec "$@"
212 die_on_fail() {
213 do_command "$@" || {
214 _ec=$?
215 [ -z "$td" ] || >"$td/failed" || :
216 die "failed command ($_ec): $*"
220 # These commands may be the non-final member of a pipe and
221 # MUST NOT be allowed to silently fail without consequence
222 awk() { die_on_fail awk "$@"; }
223 cat() { die_on_fail cat "$@"; }
224 cut() { die_on_fail cut "$@"; }
225 find() { die_on_fail find "$@"; }
226 git() { die_on_fail git "$@"; }
227 join() { die_on_fail join "$@"; }
228 perl() { die_on_fail perl "$@"; }
229 sed() { die_on_fail sed "$@"; }
230 sort() { die_on_fail sort "$@"; }
232 octet='[0-9a-f][0-9a-f]'
233 octet4="$octet$octet$octet$octet"
234 octet19="$octet4$octet4$octet4$octet4$octet$octet$octet"
235 octet20="$octet4$octet4$octet4$octet4$octet4"
237 names=
238 ignoremiss=
239 looselist=
240 objectlist=
241 dozap=
242 envok=
243 missok=
244 noperl=
246 while [ $# -ge 1 ]; do case "$1" in
247 --names)
248 names=1
249 shift
251 --replace)
252 dozap="zap-$$"
253 shift
255 --ignore-missing)
256 ignoremiss=1
257 shift
259 --ignore-missing-objects)
260 missok=1
261 shift
263 -h|--help)
264 trap - EXIT
265 if [ -t 1 ] && pg="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
266 printf '%s' "${USAGE#?}" | eval "$pg" || :
267 else
268 printf '%s' "${USAGE#?}" || :
270 exit 0
272 --loose)
273 looselist=1
274 shift
276 --objects)
277 objectlist=1
278 shift
280 --envok)
281 envok=1
282 shift
284 --sort-tags-by-id)
285 noperl=1
286 shift
289 break
291 esac; done
292 [ -z "$ignoremiss$dozap" -o -z "$objectlist" ] || die "invalid options"
294 # Always make sure we get the specified objects
295 GIT_NO_REPLACE_OBJECTS=1
296 export GIT_NO_REPLACE_OBJECTS
297 gd="$(git rev-parse --git-dir)"
298 gv="$(git --version)"
299 gv="${gv#[Gg]it version }"
300 gv="${gv%%[!0-9.]*}"
301 IFS=. read -r gvmaj gvmin gvpat <<EOT
304 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
305 # git rev-parse added --no-walk support in 1.5.3 which is required
306 # git cat-file added --batch-check support in 1.5.6 which is required
307 if [ $gvmaj -lt 1 ] || [ $gvmaj -eq 1 -a $gvmin -lt 5 ] ||
308 [ $gvmaj -eq 1 -a $gvmin -eq 5 -a $gvpat -lt 6 ]; then
309 die "combine-packs requires at least Git version 1.5.6"
311 # gcfbf is Git Cat-File --Batch-check=Format Option :)
312 gcfbf=
313 if [ $gvmaj -gt 1 ] || [ $gvmaj -eq 1 -a $gvmin -gt 8 ] ||
314 [ $gvmaj -eq 1 -a $gvmin -eq 8 -a $gvpat -ge 5 ] ; then
315 gcfbf='=%(objectname) %(objecttype)'
317 # gcfbo is Git Cat-File --Buffer Option :)
318 gcfbo=
319 if [ $gvmaj -gt 2 ] || [ $gvmaj -eq 2 -a $gvmin -ge 6 ]; then
320 gcfbo=--buffer
322 [ -n "$noperl" ] || perlbin="$(cmd_path perl)" && [ -n "$perlbin" ] || noperl=1
323 tmp="$gd"
324 gd="$(cd "$gd" && pwd -P)" || die "cd failed: $tmp"
325 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
326 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
327 godok=
328 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] && \
329 [ -d "$gd/objects" ] && godfp="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" && \
330 gdofp="$(cd "$gd/objects" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] && \
331 [ "$gdofp" = "$godfp" ]; then
332 godok=1
334 if [ -z "$godok" ]; then
335 die "GIT_OBJECT_DIRECTORY set to non-default location without --envok"
338 gdo="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
339 tmp="$gdo"
340 gdo="$(cd "$gdo" && pwd -P)" || die "cd failed: $tmp"
341 [ -d "$gdo/pack" ] || die "no such directory: $gdo/pack"
342 zap="$dozap"
344 lastarg=
345 lastargopt=
346 packbase=
347 packbasearg=
348 nonopts=0
349 for arg; do
350 lastarg="$arg"
351 lastargopt=1
352 case "$arg" in
353 --replace|--names|--ignore-missing|-h|--help|--objects)
354 die "invalid options"
356 --revs|--unpacked|--all|--reflog|--indexed-objects)
357 die "forbidden pack-objects options"
363 lastargopt=
364 nonopts=$(( $nonopts + 1 ))
365 esac
366 done
367 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] || [ $nonopts -eq 1 -a -n "$lastargopt" ] || \
368 [ $nonopts -eq 1 -a -z "$lastarg" ]; then
369 die "invalid options"
371 if [ $nonopts -eq 1 ]; then
372 packbase="$lastarg"
373 else
374 packbase="$gdo/pack/pack"
376 pbd="$(dirname "$packbase")"
377 [ -e "$pbd" -a -d "$pbd" ] || die "no such directory: $packbase"
378 packbase="$(cd "$(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
379 pbd="$(dirname "$packbase")"
380 [ -e "$pbd" -a -d "$pbd" ] || die "internal failure realpathing: $packbase"
381 packbasecheck="$packbase"
382 case "$packbase" in "$gd"/?*)
383 packbase="${packbase#$gd/}"
384 esac
385 [ $nonopts -eq 1 ] || packbasearg="$packbase"
386 [ -z "$zap" -o -n "$packbasearg" ] || die "--replace does not allow specifying pack-base"
387 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdo/pack" ] ; then
388 die "--replace and pack base dir not <git-dir-objects>/pack" >&2
391 td="$(mktemp -d "$gd/cmbnpcks-XXXXXX")"
392 tdmin="$(basename "$td")"
393 failed="$td/failed"
394 listok="$td/listok"
395 packok="$td/packok"
396 popid="$td/popid"
397 success="$td/success"
398 cm="$tdmin/commits"
399 cmo="$tdmin/ordered"
400 tg="$tdmin/tags"
401 tr="$tdmin/trees"
402 bl="$tdmin/blobs"
403 ms="$tdmin/missing"
404 trbl="$tdmin/treesblobs"
405 named="$tdmin/named"
406 named2="$tdmin/named2"
408 get_pack_base() {
409 _name="$1"
410 case "$_name" in
411 $octet20)
412 _name="$gdo/pack/pack-$_name"
414 *.idx)
415 _name="${_name%.idx}"
417 *.pack)
418 _name="${_name%.pack}"
420 esac
421 if ! [ -e "$_name.idx" -o -e "$_name.pack" ]; then
422 case "$_name" in */*) :;; *)
423 _name="$gdo/pack/$_name"
424 esac
426 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
427 [ -z "$ignoremiss" ] || return 0
428 die "no such pack found matching: $1" >&2
430 _name="$(cd "$(dirname "$_name")" && pwd -P)/$(basename "$_name")"
431 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
432 die "internal failure realpathing: $1" >&2
434 _namecheck="$_name"
435 case "$(dirname "$_name")" in "$gd"/?*)
436 _name="${_name#$gd/}"
437 esac
438 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdo/pack" ]; then
439 die "--replace and pack not in <git-dir-objects>/pack: $1" >&2
441 echo "$_name"
442 return 0
445 # add "old" prefix to passed in existing files, but be careful to hard-link
446 # ALL the files to be renamed to the renamed name BEFORE removing anything
447 move_aside() {
448 for _f; do
449 ! [ -f "$_f" ] || \
450 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
451 done
452 for _f; do
453 if [ -f "$_f" ]; then
454 rm -f "$_f"
455 ! test -f "$_f"
457 done
458 return 0
461 list_loose_objects() (
462 cd "$gdo" || return 1
463 objdirs="$(echo $octet)"
464 [ "$objdirs" != "$octet" ] || return 0
465 find $objdirs -mindepth 1 -maxdepth 1 -type f -name "$octet19" -print | sed 's,/,,'
468 origdir="$PWD"
469 cd "$gd"
470 >"$cm"
471 >"$cmo"
472 >"$tr"
473 >"$bl"
474 if [ -n "$objectlist" ]; then
475 gcf='git cat-file $gcfbo --batch-check"$gcfbf"'
476 [ -z "$looselist" ] || gcf='{ list_loose_objects && cat; } | '"$gcf"
477 eval "$gcf"
478 else
479 [ -z "$zap" ] || find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
481 [ -z "$looselist" ] || list_loose_objects
482 while IFS=': ' read -r packraw junk; do
483 pack="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack: $packraw")"
484 if [ -n "$pack" ]; then
485 [ -z "$zap" ] || [ -e "$pack.keep" ] || >"$pack.$zap"
486 git show-index <"$pack.idx"
488 done | cut -d ' ' -f 2
489 } | git cat-file $gcfbo --batch-check"$gcfbf"
490 fi | awk '{
491 if ($2=="tree") print $1
492 else if ($2=="blob") print $1 >"'"$bl"'"
493 else if ($2=="commit") print $1 >"'"$cm"'"
494 else if ($2=="tag") print $1 >"'"$tg"'"
495 else if ($2=="missing") print $1 >"'"$ms"'"
496 }' | sort -u >"$tr"
497 [ -n "$missok" ] || ! [ -s "$ms" ] || die "missing" $(wc -l <"$ms") "object(s)"
498 echo "g" | cat "$tr" "$bl" - | sort -u >"$trbl"
499 git rev-list --no-walk --objects --stdin <"$cm" |
500 awk '{
501 if ($1!=$0) print NR " " $0
502 else print $0 >"'"$cmo"'"
503 }' |
504 sort -t " " -k2,2 |
505 join -t " " -1 2 - "$trbl" >"$named"
506 join -t " " -v 1 "$tr" "$named" |
507 git rev-list --no-walk --objects --stdin |
508 awk '{print NR " " $0}' |
509 sort -t " " -k2,2 |
510 join -t " " -1 2 - "$trbl" >"$named2"
511 pocmd='git pack-objects --delta-base-offset "$@"'
512 [ -z "$packbasearg" ] || pocmd="$pocmd \"${packbasearg}tmp\""
514 cat "$cmo"
515 ! [ -s "$tg" ] || {
516 if [ -n "$noperl" ]; then
517 sort -u "$tg"
518 else
519 git cat-file $gcfbo --batch <"$tg" | perl -e "$perlprog"
523 join -t " " "$named" "$tr" |
524 sort -t " " -k2,2n
525 join -t " " "$named2" "$tr" |
526 sort -t " " -k2,2n
527 } | sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
529 join -t " " -v 1 "$named" "$tr" |
530 sort -t " " -k2,2n
531 join -t " " -v 1 "$named2" "$tr" |
532 sort -t " " -k2,2n
533 } | awk -F '[ ]' '{
534 if (NF >= 3) {
535 nm = substr($0, length($1) + length($2) + 3)
536 sfx = nm
537 gsub(/[\t\n\013\f\r ]+/, "", sfx)
538 if (length(sfx)) {
539 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
540 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
541 split(sfx, c, "")
542 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
543 sub(/[ ]+$/, "", r)
544 print NR " " $1 " " r " " nm
545 } else print NR " " $1 " " nm
546 } else print NR " " $1 " "
547 }' | sort -t " " -k3,3 -k1,1n | awk -F '[ ]' '{
548 if (NF >= 4) {
549 nm = substr($0, length($1) + length($2) + length($3) + 4)
550 print $2 " " nm
551 } else print $2 " "
553 sort -u "$bl"
554 >"$listok"
555 } | {
556 sh -c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh "$popid" "$pocmd" "$@" || {
557 rm -f "$popid"
558 die "git pack-objects failed"
560 rm -f "$popid"
561 >"$packok"
563 while read -r newpack; do
564 if [ -n "$packbasearg" ]; then
565 move_aside "$packbasearg"-$newpack.*
566 ln -f "${packbasearg}tmp"-$newpack.pack "$packbasearg"-$newpack.pack
567 ln -f "${packbasearg}tmp"-$newpack.idx "$packbasearg"-$newpack.idx
568 rm -f "${packbasearg}tmp"-$newpack.*
570 [ -z "$names" ] || echo "$newpack"
571 done
572 [ $? -eq 0 -a ! -e "$failed" -a -e "$listok" -a -e "$packok" ] || die "unspecified failure"
573 if [ -n "$zap" ]; then
574 find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print |
575 while read -r remove; do
576 rm -f "${remove%.$zap}".*
577 done
579 >"$success"