3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 printf '%s\n' path-to-pack[.idx|.pack] ... |
23 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus '\n' to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --ignore-missing-objects
36 silently ignore missing objects (explicit objects when
37 using --objects otherwise those contained in input packs)
39 --loose add the list of all currently existing loose objects in
40 the repository to the list of objects to pack
42 --objects input is a list of object hash id values instead of packs
44 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
45 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
48 sort tags by object id rather than embedded tag name
49 using this option avoids using perl when tags are present
51 If --replace is given, ALL packs to be combined MUST be located in
52 the objects/pack subdirectory of the current git directory AND the output
53 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
55 The --loose option can be used both with and without the --objects option. If
56 there are no currently existing loose objects in the repository's objects/
57 directory then it's effectively silently ignored.
59 Note that if --objects is used then --replace and --ignore-missing are invalid.
61 Unless --ignore-missing-objects is given, any input objects (either given
62 explicitly when using --objects otherwise those contained in the input packs)
63 that are not present in the current git directory (respecting the value of
64 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
65 directories, if any, will cause combine-packs to fail.
66 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
69 Unless the --sort-tags-by-id option is used then perl will be used if available
70 and any tag objects are present in the input. It provides the only efficient
71 way to extract the embedded tag name from a batch of tag objects reliably.
72 However, since the only reason the tag name is extracted is to sort the tag
73 objects for better tag deltification, if the tag objects are sorted by the
74 tag object id there is never any need to run perl. In practice, tag objects
75 rarely generate deltas and there are almost never enough tag objects in the
76 first place for the size savings of the almost-never-happens tag
77 deltification to matter anyway. This option will be activated automatically
78 if perl does not appear to be available.
80 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
81 the current git directory (as output by \`git rev-parse --git-dir\`).
83 If a <pack-name> does not exist and contains no '/' characters then it is
84 retried as objects/pack/<pack-name> instead.
86 Packs to be combined MUST have an associated .idx file.
88 The pack-base-name may be a relative path name and if so, is ALWAYS relative
89 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
91 If not given, then the pack-base-name defaults to objects/pack/pack
92 relative to the current git directory.
94 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
95 is given to allow it) then everywhere above where it says \"objects/\" is
96 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
97 And, obviously, that location is no longer necessarily a subdirectory of the
98 current git directory either.
100 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
101 the ONLY option that is automatically passed (but remember that --reuse-delta
102 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
104 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
105 --stdout are forbidden. Although --keep-true-parents is allowed it should
106 not have any effect at all. Using --incremental is recommended only for
107 wizards or with --objects as in most other cases it will result in an empty
108 pack being output. The combination of --loose --objects --incremental will
109 pack up all loose objects not already in a pack (and nothing else if standard
110 input is redirected to /dev/null in which case the --objects is optional).
112 WARNING: the move_aside logic currently only works when pack-base-name is
118 # $$ should be the same in subshells, but just in case, remember it
123 #line 100 "combine-packs.sh"
130 while ($count >= 32768) {
131 read(STDIN, $x, 32768);
134 read(STDIN, $x, $count) if $count;
140 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
141 my ($h, $t, $l) = ($1, $2, $3);
144 discard(1 + $l), next unless $2 eq "tag";
147 $count += length($_);
150 $tn = $1 if /^tag ([^ ]+)$/;
151 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
154 discard(1 + $l - $count);
155 push(@tags, [$te, "$h $tn\n"]);
158 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
161 # On some broken platforms running xargs without -r and empty input runs the command
162 xargs_r
="$(: | command xargs echo -r)"
164 # Some platforms' broken xargs runs the command always at least once even if
165 # there's no input unless given a special option. Automatically supply the
166 # option on those platforms by providing an xargs function.
167 xargs() { command xargs $xargs_r "$@"; }
176 [ -n "$td" ] && [ -e "$td/success" ] || ewf
=1
177 [ -z "$td" ] ||
! [ -e "$td" ] ||
rm -rf "$td" ||
:
178 [ -z "$gdo" -o -z "$zap" ] ||
command find "$gdo/pack" -maxdepth 1 -type f
-name "*.$zap" -print0 |
xargs -0 rm -f ||
:
179 [ -z "$ewf" ] ||
echo "combine_packs: exiting with failure" >&2 ||
:
182 trap cleanup_on_exit EXIT
189 echo "combine-packs: fatal: $*" >&2 ||
:
190 # In case we are in a sub shell force the entire command to exit
191 # The trap on TERM will make sure cleanup still happens in this case
193 [ -z "$td" ] ||
[ ! -s "$td/popid" ] || extrapid
=$
(cat "$td/popid" ||
:)
194 kill $cp_pid $extrapid ||
:
199 "unset" -f unalias command "$1" >/dev
/null
2>&1 ||
:
200 "unalias" -a >/dev
/null
2>&1 ||
:
204 # This extra indirection shouldn't be necessary, but it is for some broken sh
205 # in order for a failure to not prematurely exit die_on_fail with set -e active
207 # some shells do not handle "exec command ..." properly but just a
208 # plain "exec ..." has the same semantics so "command" is omitted here
215 [ -z "$td" ] ||
>"$td/failed" ||
:
216 die
"failed command ($_ec): $*"
220 # These commands may be the non-final member of a pipe and
221 # MUST NOT be allowed to silently fail without consequence
222 awk() { die_on_fail
awk "$@"; }
223 cat() { die_on_fail
cat "$@"; }
224 cut
() { die_on_fail cut
"$@"; }
225 find() { die_on_fail
find "$@"; }
226 git
() { die_on_fail git
"$@"; }
227 join() { die_on_fail
join "$@"; }
228 perl
() { die_on_fail perl
"$@"; }
229 sed() { die_on_fail
sed "$@"; }
230 sort() { die_on_fail
sort "$@"; }
232 octet
='[0-9a-f][0-9a-f]'
233 octet4
="$octet$octet$octet$octet"
234 octet19
="$octet4$octet4$octet4$octet4$octet$octet$octet"
235 octet20
="$octet4$octet4$octet4$octet4$octet4"
246 while [ $# -ge 1 ]; do case "$1" in
259 --ignore-missing-objects)
265 if [ -t 1 ] && pg
="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
266 printf '%s' "${USAGE#?}" |
eval "$pg" ||
:
268 printf '%s' "${USAGE#?}" ||
:
292 [ -z "$ignoremiss$dozap" -o -z "$objectlist" ] || die
"invalid options"
294 # Always make sure we get the specified objects
295 GIT_NO_REPLACE_OBJECTS
=1
296 export GIT_NO_REPLACE_OBJECTS
297 gd
="$(git rev-parse --git-dir)"
298 gv
="$(git --version)"
299 gv
="${gv#[Gg]it version }"
301 IFS
=.
read -r gvmaj gvmin gvpat
<<EOT
304 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
305 # git rev-parse added --no-walk support in 1.5.3 which is required
306 # git cat-file added --batch-check support in 1.5.6 which is required
307 if [ $gvmaj -lt 1 ] ||
[ $gvmaj -eq 1 -a $gvmin -lt 5 ] ||
308 [ $gvmaj -eq 1 -a $gvmin -eq 5 -a $gvpat -lt 6 ]; then
309 die
"combine-packs requires at least Git version 1.5.6"
311 # gcfbf is Git Cat-File --Batch-check=Format Option :)
313 if [ $gvmaj -gt 1 ] ||
[ $gvmaj -eq 1 -a $gvmin -gt 8 ] ||
314 [ $gvmaj -eq 1 -a $gvmin -eq 8 -a $gvpat -ge 5 ] ; then
315 gcfbf
='=%(objectname) %(objecttype)'
317 # gcfbo is Git Cat-File --Buffer Option :)
319 if [ $gvmaj -gt 2 ] ||
[ $gvmaj -eq 2 -a $gvmin -ge 6 ]; then
322 [ -n "$noperl" ] || perlbin
="$(cmd_path perl)" && [ -n "$perlbin" ] || noperl
=1
324 gd
="$(cd "$gd" && pwd -P)" || die
"cd failed: $tmp"
325 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
326 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
328 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] && \
329 [ -d "$gd/objects" ] && godfp
="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" && \
330 gdofp
="$(cd "$gd/objects
" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] && \
331 [ "$gdofp" = "$godfp" ]; then
334 if [ -z "$godok" ]; then
335 die
"GIT_OBJECT_DIRECTORY set to non-default location without --envok"
338 gdo
="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
340 gdo
="$(cd "$gdo" && pwd -P)" || die
"cd failed: $tmp"
341 [ -d "$gdo/pack" ] || die
"no such directory: $gdo/pack"
353 --replace|
--names|
--ignore-missing|
-h|
--help|
--objects)
354 die
"invalid options"
356 --revs|
--unpacked|
--all|
--reflog|
--indexed-objects)
357 die
"forbidden pack-objects options"
364 nonopts
=$
(( $nonopts + 1 ))
367 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] ||
[ $nonopts -eq 1 -a -n "$lastargopt" ] || \
368 [ $nonopts -eq 1 -a -z "$lastarg" ]; then
369 die
"invalid options"
371 if [ $nonopts -eq 1 ]; then
374 packbase
="$gdo/pack/pack"
376 pbd
="$(dirname "$packbase")"
377 [ -e "$pbd" -a -d "$pbd" ] || die
"no such directory: $packbase"
378 packbase
="$(cd "$
(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
379 pbd
="$(dirname "$packbase")"
380 [ -e "$pbd" -a -d "$pbd" ] || die
"internal failure realpathing: $packbase"
381 packbasecheck
="$packbase"
382 case "$packbase" in "$gd"/?
*)
383 packbase
="${packbase#$gd/}"
385 [ $nonopts -eq 1 ] || packbasearg
="$packbase"
386 [ -z "$zap" -o -n "$packbasearg" ] || die
"--replace does not allow specifying pack-base"
387 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdo/pack" ] ; then
388 die
"--replace and pack base dir not <git-dir-objects>/pack" >&2
391 td
="$(mktemp -d "$gd/cmbnpcks-XXXXXX
")"
392 tdmin
="$(basename "$td")"
397 success
="$td/success"
404 trbl
="$tdmin/treesblobs"
406 named2
="$tdmin/named2"
412 _name
="$gdo/pack/pack-$_name"
415 _name
="${_name%.idx}"
418 _name
="${_name%.pack}"
421 if ! [ -e "$_name.idx" -o -e "$_name.pack" ]; then
422 case "$_name" in */*) :;; *)
423 _name
="$gdo/pack/$_name"
426 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
427 [ -z "$ignoremiss" ] ||
return 0
428 die
"no such pack found matching: $1" >&2
430 _name
="$(cd "$
(dirname "$_name")" && pwd -P)/$(basename "$_name")"
431 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
432 die
"internal failure realpathing: $1" >&2
435 case "$(dirname "$_name")" in "$gd"/?
*)
436 _name
="${_name#$gd/}"
438 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdo/pack" ]; then
439 die
"--replace and pack not in <git-dir-objects>/pack: $1" >&2
445 # add "old" prefix to passed in existing files, but be careful to hard-link
446 # ALL the files to be renamed to the renamed name BEFORE removing anything
450 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
453 if [ -f "$_f" ]; then
461 list_loose_objects
() (
462 cd "$gdo" ||
return 1
463 objdirs
="$(echo $octet)"
464 [ "$objdirs" != "$octet" ] ||
return 0
465 find $objdirs -mindepth 1 -maxdepth 1 -type f
-name "$octet19" -print |
sed 's,/,,'
474 if [ -n "$objectlist" ]; then
475 gcf
='git cat-file $gcfbo --batch-check"$gcfbf"'
476 [ -z "$looselist" ] || gcf
='{ list_loose_objects && cat; } | '"$gcf"
479 [ -z "$zap" ] ||
find "$gdo/pack" -maxdepth 1 -type f
-name "*.$zap" -print0 |
xargs -0 rm -f ||
:
481 [ -z "$looselist" ] || list_loose_objects
482 while IFS
=': ' read -r packraw junk
; do
483 pack
="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack
: $packraw")"
484 if [ -n "$pack" ]; then
485 [ -z "$zap" ] ||
[ -e "$pack.keep" ] ||
>"$pack.$zap"
486 git show-index
<"$pack.idx"
488 done | cut
-d ' ' -f 2
489 } | git cat-file
$gcfbo --batch-check"$gcfbf"
491 if ($2=="tree") print $1
492 else if ($2=="blob") print $1 >"'"$bl"'"
493 else if ($2=="commit") print $1 >"'"$cm"'"
494 else if ($2=="tag") print $1 >"'"$tg"'"
495 else if ($2=="missing") print $1 >"'"$ms"'"
497 [ -n "$missok" ] ||
! [ -s "$ms" ] || die
"missing" $
(wc -l <"$ms") "object(s)"
498 echo "g" |
cat "$tr" "$bl" - |
sort -u >"$trbl"
499 git rev-list
--no-walk --objects --stdin <"$cm" |
501 if ($1!=$0) print NR " " $0
502 else print $0 >"'"$cmo"'"
505 join -t " " -1 2 - "$trbl" >"$named"
506 join -t " " -v 1 "$tr" "$named" |
507 git rev-list
--no-walk --objects --stdin |
508 awk '{print NR " " $0}' |
510 join -t " " -1 2 - "$trbl" >"$named2"
511 pocmd
='git pack-objects --delta-base-offset "$@"'
512 [ -z "$packbasearg" ] || pocmd
="$pocmd \"${packbasearg}tmp\""
516 if [ -n "$noperl" ]; then
519 git cat-file
$gcfbo --batch <"$tg" | perl
-e "$perlprog"
523 join -t " " "$named" "$tr" |
525 join -t " " "$named2" "$tr" |
527 } |
sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
529 join -t " " -v 1 "$named" "$tr" |
531 join -t " " -v 1 "$named2" "$tr" |
535 nm = substr($0, length($1) + length($2) + 3)
537 gsub(/[\t\n\013\f\r ]+/, "", sfx)
539 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
540 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
542 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
544 print NR " " $1 " " r " " nm
545 } else print NR " " $1 " " nm
546 } else print NR " " $1 " "
547 }' |
sort -t " " -k3,3 -k1,1n |
awk -F '[ ]' '{
549 nm = substr($0, length($1) + length($2) + length($3) + 4)
556 sh
-c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh
"$popid" "$pocmd" "$@" ||
{
558 die
"git pack-objects failed"
563 while read -r newpack
; do
564 if [ -n "$packbasearg" ]; then
565 move_aside
"$packbasearg"-$newpack.
*
566 ln -f "${packbasearg}tmp"-$newpack.pack
"$packbasearg"-$newpack.pack
567 ln -f "${packbasearg}tmp"-$newpack.idx
"$packbasearg"-$newpack.idx
568 rm -f "${packbasearg}tmp"-$newpack.
*
570 [ -z "$names" ] ||
echo "$newpack"
572 [ $?
-eq 0 -a ! -e "$failed" -a -e "$listok" -a -e "$packok" ] || die
"unspecified failure"
573 if [ -n "$zap" ]; then
574 find "$gdo/pack" -maxdepth 1 -type f
-name "*.$zap" -print |
575 while read -r remove
; do
576 rm -f "${remove%.$zap}".
*