3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016,2017 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 printf '%s\n' path-to-pack[.idx|.pack] ... |
23 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus '\n' to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --ignore-missing-objects
36 silently ignore missing objects (explicit objects when
37 using --objects otherwise those contained in input packs)
39 --loose add the list of all currently existing loose objects in
40 the repository to the list of objects to pack
42 --objects input is a list of object hash id values instead of packs
44 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
45 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
48 sort tags by object id rather than embedded tag name
49 using this option avoids using perl when tags are present
51 --weak-naming use perl to produce weaker object names (and likely larger
52 output packs) instead of naming with rev-list '--objects'
53 (this option requires tree objects contain 20-byte hashes)
55 If --replace is given, ALL packs to be combined MUST be located in
56 the objects/pack subdirectory of the current git directory AND the output
57 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
59 The --loose option can be used both with and without the --objects option. If
60 there are no currently existing loose objects in the repository's objects/
61 directory then it's effectively silently ignored.
63 Note that if --objects is used then --replace and --ignore-missing are invalid.
65 Unless --ignore-missing-objects is given, any input objects (either given
66 explicitly when using --objects otherwise those contained in the input packs)
67 that are not present in the current git directory (respecting the value of
68 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
69 directories, if any, will cause combine-packs to fail.
70 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
73 Unless the --sort-tags-by-id option is used then perl will be used if available
74 and any tag objects are present in the input. It provides the only efficient
75 way to extract the embedded tag name from a batch of tag objects reliably.
76 However, since the only reason the tag name is extracted is to sort the tag
77 objects for better tag deltification, if the tag objects are sorted by the
78 tag object id there is never any need to run perl. In practice, tag objects
79 rarely generate deltas and there are almost never enough tag objects in the
80 first place for the size savings of the almost-never-happens tag
81 deltification to matter anyway. This option will be activated automatically
82 if perl does not appear to be available.
84 Normally all commit and tree objects to be packed are 'named' using the git
85 rev-list --objects command so that the best possible pack(s) can be produced.
86 This requires that all tree objects referenced from commits and trees being
87 packed (recursively for trees) as well as all the blobs referenced by them
88 must be present in the repository or else the 'rev-list --objects' command
89 used to name them will fail. As an alternative the --weak-naming option will
90 avoid use of the '--objects' option and name the contents of tree objects
91 using a perl script. The resulting names are good, but not _as_ good which
92 may produce a less efficiently packed pack. It does, however, permit packing
93 completely arbitrarily selected objects without error.
95 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
96 the current git directory (as output by \`git rev-parse --git-dir\` or
97 by \`git rev-parse --git-common-dir\` for Git version 2.5 or later).
99 If a <pack-name> does not exist and contains no '/' characters then it is
100 retried as objects/pack/<pack-name> instead.
102 Packs to be combined MUST have an associated .idx file.
104 The pack-base-name may be a relative path name and if so, is ALWAYS relative
105 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
107 If not given, then the pack-base-name defaults to objects/pack/pack
108 relative to the current git directory.
110 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
111 is given to allow it) then everywhere above where it says \"objects/\" is
112 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
113 And, obviously, that location is no longer necessarily a subdirectory of the
114 current git directory either.
116 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
117 the ONLY option that is automatically passed (but remember that --reuse-delta
118 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
120 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
121 --stdout are forbidden. Although --keep-true-parents is allowed it should
122 not have any effect at all. Using --incremental is recommended only for
123 wizards or with --objects as in most other cases it will result in an empty
124 pack being output. The combination of --loose --objects --incremental will
125 pack up all loose objects not already in a pack (and nothing else if standard
126 input is redirected to /dev/null in which case the --objects is optional).
128 WARNING: the move_aside logic currently only works when pack-base-name is
134 # $$ should be the same in subshells, but just in case, remember it
139 #line 140 "combine-packs.sh"
146 while ($count >= 32768) {
147 read(STDIN, $x, 32768);
150 read(STDIN, $x, $count) if $count;
156 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
157 my ($h, $t, $l) = ($1, $2, $3);
160 discard(1 + $l), next unless $2 eq "tag";
163 $count += length($_);
166 $tn = $1 if /^tag ([^ ]+)$/;
167 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
170 discard(1 + $l - $count);
171 push(@tags, [$te, "$h $tn\n"]);
174 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
178 #line 179 "combine-packs.sh"
186 while ($count >= 32768) {
187 $len = read(STDIN, $x, 32768);
188 defined($len) && $len == 32768 or die "bad --batch output";
192 $len = read(STDIN, $x, $count);
193 defined($len) && $len == $count or die "bad --batch output";
200 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
201 my ($h, $t, $l) = ($1, $2, $3);
202 discard(1 + $l), next unless $2 eq "tree";
205 $count = read(STDIN, $tr, $l) if $l;
206 defined($count) && $count == $l or die "bad --batch output";
209 print $ln, " ", $h, " ~~~~ \n";
212 $pos < $l && ($loc = index($tr, "\0", $pos)) > $pos && $loc + 20 < $l;
213 $pos = $loc + 20 + 1) {
214 substr($tr, $pos, $loc - $pos) =~ /^([0-7]{5,6}) (.*)$/os or die "bad --batch output";
215 my ($mode, $name) = (oct($1), $2);
216 $mode == 0100644 || $mode == 040000 or next;
219 $r =~ s/[\t\n\013\f\r ]+//gos;
220 $r = substr(reverse($r), 0, 16);
221 my $i = unpack("H*", substr($tr, $loc + 1, 20));
224 print $ln, " ", $i, " ", $r, " ", $name, "\n";
226 print $ln, " ", $i, " ", $name, "\n";
229 $pos == $l or die "bad --batch output";
241 [ -n "$td" ] && [ -e "$td/success" ] || ewf
=1
242 [ -z "$td" ] ||
! [ -e "$td" ] ||
rm -rf "$td" ||
:
243 [ -z "$gdo" ] ||
[ -z "$zap" ] ||
command find -L "$gdo/pack" -maxdepth 1 -type f
-name "*.$zap" -exec rm -f '{}' + ||
:
244 [ -z "$ewf" ] ||
echo "combine_packs: exiting with failure" >&2 ||
:
247 trap cleanup_on_exit EXIT
254 echo "combine-packs: fatal: $*" >&2 ||
:
255 # In case we are in a sub shell force the entire command to exit
256 # The trap on TERM will make sure cleanup still happens in this case
258 [ -z "$td" ] ||
! [ -s "$td/popid" ] || extrapid
="$(cat "$td/popid
")" ||
:
259 kill $cp_pid $extrapid ||
:
264 "unset" -f unalias command "$1" >/dev
/null
2>&1 ||
:
265 "unalias" -a >/dev
/null
2>&1 ||
:
269 # This extra indirection shouldn't be necessary, but it is for some broken sh
270 # in order for a failure to not prematurely exit die_on_fail with set -e active
272 # some shells do not handle "exec command ..." properly but just a
273 # plain "exec ..." has the same semantics so "command" is omitted here
280 [ -z "$td" ] ||
>"$td/failed" ||
:
281 die
"failed command ($_ec): $*"
285 # These commands may be the non-final member of a pipe and
286 # MUST NOT be allowed to silently fail without consequence
287 awk() { die_on_fail
awk "$@"; }
288 cat() { die_on_fail
cat "$@"; }
289 cut
() { die_on_fail cut
"$@"; }
290 find() { die_on_fail
find "$@"; }
291 git
() { die_on_fail git
"$@"; }
292 join() { die_on_fail
join "$@"; }
293 perl
() { die_on_fail perl
"$@"; }
294 sed() { die_on_fail
sed "$@"; }
295 sort() { die_on_fail
sort "$@"; }
298 octet
="$hexdig$hexdig"
299 octet4
="$octet$octet$octet$octet"
300 octet19
="$octet4$octet4$octet4$octet4$octet$octet$octet"
301 octet20
="$octet4$octet4$octet4$octet4$octet4"
313 while [ $# -ge 1 ]; do case "$1" in
326 --ignore-missing-objects)
332 if [ -t 1 ] && pg
="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
333 printf '%s' "${USAGE#?}" |
eval "$pg" ||
:
335 printf '%s' "${USAGE#?}" ||
:
363 [ -z "$ignoremiss$dozap" ] ||
[ -z "$objectlist" ] || die
"invalid options"
365 [ -n "$noperl" ] && [ -z "$weak" ] ||
{ perlbin
="$(cmd_path perl)" && [ -n "$perlbin" ]; } || noperl
=1
366 [ -z "$weak" ] ||
[ -n "$perlbin" ] || die
"--weak-naming requires perl"
368 # Always make sure we get the specified objects
369 GIT_NO_REPLACE_OBJECTS
=1
370 export GIT_NO_REPLACE_OBJECTS
371 gd
="$(git rev-parse --git-dir)" && [ -n "$gd" ] ||
372 die
"git rev-parse --git-dir failed"
373 gv
="$(git --version)"
374 gv
="${gv#[Gg]it version }"
376 IFS
=.
read -r gvmaj gvmin gvpat
<<EOT
379 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
380 # git rev-parse added --no-walk support in 1.5.3 which is required
381 # git cat-file added --batch-check support in 1.5.6 which is required
382 if [ $gvmaj -lt 1 ] ||
{ [ $gvmaj -eq 1 ] && [ $gvmin -lt 5 ]; } ||
383 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 5 ] && [ $gvpat -lt 6 ]; }; then
384 die
"combine-packs requires at least Git version 1.5.6"
387 gd
="$(cd "$gd" && pwd -P)" || die
"cd failed: $tmp"
388 # git rev-parse added --git-common-dir in 2.5
389 if [ $gvmaj -gt 2 ] ||
{ [ $gvmaj -eq 2 ] && [ $gvmin -ge 5 ]; }; then
390 # rev-parse --git-common-dir is broken and may give an
391 # incorrect result without a suitable current directory
393 gd
="$(cd "$gd" && cd "$
(git rev-parse
--git-common-dir)" && pwd -P)" &&
395 die
"git rev-parse --git-common-dir failed from: $tmp"
397 # gcfbf is Git Cat-File --Batch-check=Format Option :)
399 if [ $gvmaj -gt 1 ] ||
{ [ $gvmaj -eq 1 ] && [ $gvmin -gt 8 ]; } ||
400 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 8 ] && [ $gvpat -ge 5 ]; }; then
401 gcfbf
='=%(objectname) %(objecttype)'
403 # gcfbo is Git Cat-File --Buffer Option :)
405 if [ $gvmaj -gt 2 ] ||
{ [ $gvmaj -eq 2 ] && [ $gvmin -ge 6 ]; }; then
408 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
409 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
411 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] &&
412 [ -d "$gd/objects" ] && godfp
="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" &&
413 gdofp
="$(cd "$gd/objects
" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] &&
414 [ "$gdofp" = "$godfp" ]; then
417 if [ -z "$godok" ]; then
418 die
"GIT_OBJECT_DIRECTORY set to non-default location without --envok"
421 gdo
="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
423 gdo
="$(cd "$gdo" && pwd -P)" || die
"cd failed: $tmp"
424 [ -d "$gdo/pack" ] || die
"no such directory: $gdo/pack"
436 --replace|
--names|
--ignore-missing|
-h|
--help|
--objects)
437 die
"invalid options"
439 --revs|
--unpacked|
--all|
--reflog|
--indexed-objects)
440 die
"forbidden pack-objects options"
447 nonopts
=$
(( $nonopts + 1 ))
450 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] ||
451 { [ $nonopts -eq 1 ] && [ -n "$lastargopt" ]; } ||
452 { [ $nonopts -eq 1 ] && [ -z "$lastarg" ]; }; then
453 die
"invalid options"
455 if [ $nonopts -eq 1 ]; then
458 packbase
="$gdo/pack/pack"
460 pbd
="$(dirname "$packbase")"
461 case "$pbd" in /*);;*)
464 [ -e "$pbd" ] && [ -d "$pbd" ] || die
"no such directory: $(dirname "$packbase")"
465 packbase
="$(cd "$pbd" && pwd -P)/$(basename "$packbase")"
466 pbd
="$(dirname "$packbase")"
467 [ -e "$pbd" ] && [ -d "$pbd" ] || die
"internal failure realpathing: $packbase"
468 packbasecheck
="$packbase"
469 case "$packbase" in "$gd"/?
*)
470 packbase
="${packbase#$gd/}"
472 [ $nonopts -eq 1 ] || packbasearg
="$packbase"
473 [ -z "$zap" ] ||
[ -n "$packbasearg" ] || die
"--replace does not allow specifying pack-base"
474 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdo/pack" ] ; then
475 die
"--replace and pack base dir not <git-dir-objects>/pack" >&2
478 td
="$(mktemp -d "$gd/cmbnpcks-XXXXXX
")"
479 tdmin
="$(basename "$td")"
484 success
="$td/success"
491 trbl
="$tdmin/treesblobs"
493 named2
="$tdmin/named2"
499 _name
="$gdo/pack/pack-$_name"
502 _name
="${_name%.idx}"
505 _name
="${_name%.pack}"
508 if ! [ -e "$_name.idx" ] && ! [ -e "$_name.pack" ]; then
509 case "$_name" in */*) :;; *)
510 _name
="$gdo/pack/$_name"
513 if ! [ -f "$_name.idx" ] ||
! [ -s "$_name.idx" ] ||
514 ! [ -f "$_name.pack" ] ||
! [ -s "$_name.pack" ]; then
515 [ -z "$ignoremiss" ] ||
return 0
516 die
"no such pack found matching: $1" >&2
518 _name
="$(cd "$
(dirname "$_name")" && pwd -P)/$(basename "$_name")"
519 if ! [ -f "$_name.idx" ] ||
! [ -s "$_name.idx" ] ||
520 ! [ -f "$_name.pack" ] ||
! [ -s "$_name.pack" ]; then
521 die
"internal failure realpathing: $1" >&2
524 case "$(dirname "$_name")" in "$gd"/?
*)
525 _name
="${_name#$gd/}"
527 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdo/pack" ]; then
528 die
"--replace and pack not in <git-dir-objects>/pack: $1" >&2
534 # add "old" prefix to passed in existing files, but be careful to hard-link
535 # ALL the files to be renamed to the renamed name BEFORE removing anything
539 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
542 if [ -f "$_f" ]; then
550 list_loose_objects
() (
551 cd "$gdo" ||
return 1
552 objdirs
="$(echo $octet)"
553 [ "$objdirs" != "$octet" ] ||
return 0
554 find -L $objdirs -mindepth 1 -maxdepth 1 -type f
-name "$octet19*" -print |
sed 's,/,,'
563 if [ -n "$objectlist" ]; then
564 gcf
='git cat-file $gcfbo --batch-check"$gcfbf"'
565 [ -z "$looselist" ] || gcf
='{ list_loose_objects && cat; } | '"$gcf"
568 [ -z "$zap" ] ||
command find -L "$gdo/pack" -maxdepth 1 -type f
-name "*.$zap" -exec rm -f '{}' + ||
:
570 [ -z "$looselist" ] || list_loose_objects
571 while IFS
=': ' read -r packraw junk
; do
572 pack
="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack
: $packraw")"
573 if [ -n "$pack" ]; then
574 [ -z "$zap" ] ||
[ -e "$pack.keep" ] ||
>"$pack.$zap"
575 git show-index
<"$pack.idx"
577 done | cut
-d ' ' -f 2
578 } | git cat-file
$gcfbo --batch-check"$gcfbf"
580 if ($2=="tree") print $1
581 else if ($2=="blob") print $1 >"'"$bl"'"
582 else if ($2=="commit") print $1 >"'"$cm"'"
583 else if ($2=="tag") print $1 >"'"$tg"'"
584 else if ($2=="missing") print $1 >"'"$ms"'"
586 [ -n "$missok" ] ||
! [ -s "$ms" ] || die
"missing" $
(wc -l <"$ms") "object(s)"
587 echo "g" |
cat "$tr" "$bl" - |
sort -u >"$trbl"
588 if [ -z "$weak" ]; then
589 git rev-list
--no-walk --objects --stdin <"$cm" |
591 if ($1!=$0) print NR " " $0
592 else print $0 >"'"$cmo"'"
595 join -t " " -1 2 - "$trbl" >"$named"
596 join -t " " -v 1 "$tr" "$named" |
597 git rev-list
--no-walk --objects --stdin |
598 awk '{print NR " " $0}' |
600 join -t " " -1 2 - "$trbl" >"$named2"
602 ! [ -s "$cm" ] || git rev-list
--no-walk --stdin <"$cm" >"$cmo"
603 git cat-file
$gcfbo --batch <"$tr" |
604 perl
-e "$perlnameprog" |
606 join -t " " -1 2 - "$trbl" >"$named"
608 pocmd
='git pack-objects --delta-base-offset "$@"'
609 [ -z "$packbasearg" ] || pocmd
="$pocmd \"${packbasearg}tmp\""
613 if [ -n "$noperl" ]; then
616 git cat-file
$gcfbo --batch <"$tg" | perl
-e "$perltagprog"
619 if [ -z "$weak" ]; then
621 join -t " " "$named" "$tr" |
623 join -t " " "$named2" "$tr" |
625 } |
sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
627 join -t " " -v 1 "$named" "$tr" |
629 join -t " " -v 1 "$named2" "$tr" |
633 nm = substr($0, length($1) + length($2) + 3)
635 gsub(/[\t\n\013\f\r ]+/, "", sfx)
637 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
638 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
640 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
642 print NR " " $1 " " r " " nm
643 } else print NR " " $1 " " nm
644 } else print NR " " $1 " "
645 }' |
sort -t " " -k3,3 -k1,1n |
awk -F '[ ]' '{
647 nm = substr($0, length($1) + length($2) + length($3) + 4)
653 join -t " " "$named" "$tr" |
654 sort -t " " -k3,3 -k2,2n
655 join -t " " -v 1 "$named" "$tr" |
656 sort -t " " -k3,3 -k2,2n
659 nm = substr($0, length($1) + length($2) + length($3) + 4)
667 sh
-c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh
"$popid" "$pocmd" "$@" ||
{
669 die
"git pack-objects failed"
674 while read -r newpack
; do
675 if [ -n "$packbasearg" ]; then
676 move_aside
"$packbasearg"-$newpack.
*
677 ln -f "${packbasearg}tmp"-$newpack.pack
"$packbasearg"-$newpack.pack
678 ln -f "${packbasearg}tmp"-$newpack.idx
"$packbasearg"-$newpack.idx
679 rm -f "${packbasearg}tmp"-$newpack.
*
681 [ -z "$names" ] ||
echo "$newpack"
683 [ $?
-eq 0 ] && ! [ -e "$failed" ] && [ -e "$listok" ] && [ -e "$packok" ] ||
684 die
"unspecified failure"
685 if [ -n "$zap" ]; then
686 (cd "$gdo" && [ -d "pack" ] && find -L "pack" -maxdepth 1 -type f
-name "*.$zap" -print) |
687 while read -r remove
; do
688 rm -f "$gdo/${remove%.$zap}".
*