scripts: purge use of test '-a' and '-o' ops and clean up
[girocco.git] / jobd / combine-packs.sh
blobdb80c6e62386067d0419dcb158c3050611a435f0
1 #!/bin/sh
3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016,2017 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Version 1.1.20
21 USAGE="
22 printf '%s\n' path-to-pack[.idx|.pack] ... |
23 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus '\n' to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --ignore-missing-objects
36 silently ignore missing objects (explicit objects when
37 using --objects otherwise those contained in input packs)
39 --loose add the list of all currently existing loose objects in
40 the repository to the list of objects to pack
42 --objects input is a list of object hash id values instead of packs
44 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
45 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
47 --sort-tags-by-id
48 sort tags by object id rather than embedded tag name
49 using this option avoids using perl when tags are present
51 If --replace is given, ALL packs to be combined MUST be located in
52 the objects/pack subdirectory of the current git directory AND the output
53 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
55 The --loose option can be used both with and without the --objects option. If
56 there are no currently existing loose objects in the repository's objects/
57 directory then it's effectively silently ignored.
59 Note that if --objects is used then --replace and --ignore-missing are invalid.
61 Unless --ignore-missing-objects is given, any input objects (either given
62 explicitly when using --objects otherwise those contained in the input packs)
63 that are not present in the current git directory (respecting the value of
64 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
65 directories, if any, will cause combine-packs to fail.
66 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
67 the output pack(s)!
69 Unless the --sort-tags-by-id option is used then perl will be used if available
70 and any tag objects are present in the input. It provides the only efficient
71 way to extract the embedded tag name from a batch of tag objects reliably.
72 However, since the only reason the tag name is extracted is to sort the tag
73 objects for better tag deltification, if the tag objects are sorted by the
74 tag object id there is never any need to run perl. In practice, tag objects
75 rarely generate deltas and there are almost never enough tag objects in the
76 first place for the size savings of the almost-never-happens tag
77 deltification to matter anyway. This option will be activated automatically
78 if perl does not appear to be available.
80 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
81 the current git directory (as output by \`git rev-parse --git-dir\` or
82 by \`git rev-parse --git-common-dir\` for Git version 2.5 or later).
84 If a <pack-name> does not exist and contains no '/' characters then it is
85 retried as objects/pack/<pack-name> instead.
87 Packs to be combined MUST have an associated .idx file.
89 The pack-base-name may be a relative path name and if so, is ALWAYS relative
90 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
92 If not given, then the pack-base-name defaults to objects/pack/pack
93 relative to the current git directory.
95 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
96 is given to allow it) then everywhere above where it says \"objects/\" is
97 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
98 And, obviously, that location is no longer necessarily a subdirectory of the
99 current git directory either.
101 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
102 the ONLY option that is automatically passed (but remember that --reuse-delta
103 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
105 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
106 --stdout are forbidden. Although --keep-true-parents is allowed it should
107 not have any effect at all. Using --incremental is recommended only for
108 wizards or with --objects as in most other cases it will result in an empty
109 pack being output. The combination of --loose --objects --incremental will
110 pack up all loose objects not already in a pack (and nothing else if standard
111 input is redirected to /dev/null in which case the --objects is optional).
113 WARNING: the move_aside logic currently only works when pack-base-name is
114 completely omitted!
117 set -e
119 # $$ should be the same in subshells, but just in case, remember it
120 cp_pid=$$
122 perlprog='
123 #!/usr/bin/perl
124 #line 100 "combine-packs.sh"
125 use strict;
126 use warnings;
128 sub discard {
129 my $count = shift;
130 my $x = "";
131 while ($count >= 32768) {
132 read(STDIN, $x, 32768);
133 $count -= 32768;
135 read(STDIN, $x, $count) if $count;
138 my @tags = ();
139 binmode STDIN;
140 while (<STDIN>) {
141 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
142 my ($h, $t, $l) = ($1, $2, $3);
143 my $te = 0;
144 my $tn = "";
145 discard(1 + $l), next unless $2 eq "tag";
146 my $count = 0;
147 while (<STDIN>) {
148 $count += length($_);
149 chomp;
150 last if /^$/;
151 $tn = $1 if /^tag ([^ ]+)$/;
152 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
153 last if $tn && $te;
155 discard(1 + $l - $count);
156 push(@tags, [$te, "$h $tn\n"]);
159 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
162 # On some broken platforms running xargs without -r and empty input runs the command
163 xargs_r="$(: | command xargs echo -r)"
165 # Some platforms' broken xargs runs the command always at least once even if
166 # there's no input unless given a special option. Automatically supply the
167 # option on those platforms by providing an xargs function.
168 xargs() { command xargs $xargs_r "$@"; }
171 zap=
173 gdo=
175 cleanup_on_exit() {
176 ewf=
177 [ -n "$td" ] && [ -e "$td/success" ] || ewf=1
178 [ -z "$td" ] || ! [ -e "$td" ] || rm -rf "$td" || :
179 [ -z "$gdo" ] || [ -z "$zap" ] || command find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
180 [ -z "$ewf" ] || echo "combine_packs: exiting with failure" >&2 || :
183 trap cleanup_on_exit EXIT
184 trap 'exit 129' HUP
185 trap 'exit 130' INT
186 trap 'exit 131' QUIT
187 trap 'exit 143' TERM
189 die() {
190 echo "combine-packs: fatal: $*" >&2 || :
191 # In case we are in a sub shell force the entire command to exit
192 # The trap on TERM will make sure cleanup still happens in this case
193 extrapid=
194 [ -z "$td" ] || ! [ -s "$td/popid" ] || extrapid="$(cat "$td/popid")" || :
195 kill $cp_pid $extrapid || :
196 exit 1
199 cmd_path() (
200 "unset" -f unalias command "$1" >/dev/null 2>&1 || :
201 "unalias" -a >/dev/null 2>&1 || :
202 "command" -v "$1"
203 ) 2>/dev/null
205 # This extra indirection shouldn't be necessary, but it is for some broken sh
206 # in order for a failure to not prematurely exit die_on_fail with set -e active
207 do_command() (
208 # some shells do not handle "exec command ..." properly but just a
209 # plain "exec ..." has the same semantics so "command" is omitted here
210 LC_ALL=C exec "$@"
213 die_on_fail() {
214 do_command "$@" || {
215 _ec=$?
216 [ -z "$td" ] || >"$td/failed" || :
217 die "failed command ($_ec): $*"
221 # These commands may be the non-final member of a pipe and
222 # MUST NOT be allowed to silently fail without consequence
223 awk() { die_on_fail awk "$@"; }
224 cat() { die_on_fail cat "$@"; }
225 cut() { die_on_fail cut "$@"; }
226 find() { die_on_fail find "$@"; }
227 git() { die_on_fail git "$@"; }
228 join() { die_on_fail join "$@"; }
229 perl() { die_on_fail perl "$@"; }
230 sed() { die_on_fail sed "$@"; }
231 sort() { die_on_fail sort "$@"; }
233 octet='[0-9a-f][0-9a-f]'
234 octet4="$octet$octet$octet$octet"
235 octet19="$octet4$octet4$octet4$octet4$octet$octet$octet"
236 octet20="$octet4$octet4$octet4$octet4$octet4"
238 names=
239 ignoremiss=
240 looselist=
241 objectlist=
242 dozap=
243 envok=
244 missok=
245 noperl=
247 while [ $# -ge 1 ]; do case "$1" in
248 --names)
249 names=1
250 shift
252 --replace)
253 dozap="zap-$$"
254 shift
256 --ignore-missing)
257 ignoremiss=1
258 shift
260 --ignore-missing-objects)
261 missok=1
262 shift
264 -h|--help)
265 trap - EXIT
266 if [ -t 1 ] && pg="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
267 printf '%s' "${USAGE#?}" | eval "$pg" || :
268 else
269 printf '%s' "${USAGE#?}" || :
271 exit 0
273 --loose)
274 looselist=1
275 shift
277 --objects)
278 objectlist=1
279 shift
281 --envok)
282 envok=1
283 shift
285 --sort-tags-by-id)
286 noperl=1
287 shift
290 break
292 esac; done
293 [ -z "$ignoremiss$dozap" ] || [ -z "$objectlist" ] || die "invalid options"
295 # Always make sure we get the specified objects
296 GIT_NO_REPLACE_OBJECTS=1
297 export GIT_NO_REPLACE_OBJECTS
298 gd="$(git rev-parse --git-dir)" && [ -n "$gd" ] ||
299 die "git rev-parse --git-dir failed"
300 gv="$(git --version)"
301 gv="${gv#[Gg]it version }"
302 gv="${gv%%[!0-9.]*}"
303 IFS=. read -r gvmaj gvmin gvpat <<EOT
306 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
307 # git rev-parse added --no-walk support in 1.5.3 which is required
308 # git cat-file added --batch-check support in 1.5.6 which is required
309 if [ $gvmaj -lt 1 ] || { [ $gvmaj -eq 1 ] && [ $gvmin -lt 5 ]; } ||
310 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 5 ] && [ $gvpat -lt 6 ]; }; then
311 die "combine-packs requires at least Git version 1.5.6"
313 tmp="$gd"
314 gd="$(cd "$gd" && pwd -P)" || die "cd failed: $tmp"
315 # git rev-parse added --git-common-dir in 2.5
316 if [ $gvmaj -gt 2 ] || { [ $gvmaj -eq 2 ] && [ $gvmin -ge 5 ]; }; then
317 # rev-parse --git-common-dir is broken and may give an
318 # incorrect result without a suitable current directory
319 tmp="$gd"
320 gd="$(cd "$gd" && cd "$(git rev-parse --git-common-dir)" && pwd -P)" &&
321 [ -n "$gd" ] ||
322 die "git rev-parse --git-common-dir failed from: $tmp"
324 # gcfbf is Git Cat-File --Batch-check=Format Option :)
325 gcfbf=
326 if [ $gvmaj -gt 1 ] || { [ $gvmaj -eq 1 ] && [ $gvmin -gt 8 ]; } ||
327 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 8 ] && [ $gvpat -ge 5 ]; }; then
328 gcfbf='=%(objectname) %(objecttype)'
330 # gcfbo is Git Cat-File --Buffer Option :)
331 gcfbo=
332 if [ $gvmaj -gt 2 ] || { [ $gvmaj -eq 2 ] && [ $gvmin -ge 6 ]; }; then
333 gcfbo=--buffer
335 [ -n "$noperl" ] || perlbin="$(cmd_path perl)" && [ -n "$perlbin" ] || noperl=1
336 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
337 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
338 godok=
339 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] &&
340 [ -d "$gd/objects" ] && godfp="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" &&
341 gdofp="$(cd "$gd/objects" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] &&
342 [ "$gdofp" = "$godfp" ]; then
343 godok=1
345 if [ -z "$godok" ]; then
346 die "GIT_OBJECT_DIRECTORY set to non-default location without --envok"
349 gdo="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
350 tmp="$gdo"
351 gdo="$(cd "$gdo" && pwd -P)" || die "cd failed: $tmp"
352 [ -d "$gdo/pack" ] || die "no such directory: $gdo/pack"
353 zap="$dozap"
355 lastarg=
356 lastargopt=
357 packbase=
358 packbasearg=
359 nonopts=0
360 for arg; do
361 lastarg="$arg"
362 lastargopt=1
363 case "$arg" in
364 --replace|--names|--ignore-missing|-h|--help|--objects)
365 die "invalid options"
367 --revs|--unpacked|--all|--reflog|--indexed-objects)
368 die "forbidden pack-objects options"
374 lastargopt=
375 nonopts=$(( $nonopts + 1 ))
376 esac
377 done
378 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] ||
379 { [ $nonopts -eq 1 ] && [ -n "$lastargopt" ]; } ||
380 { [ $nonopts -eq 1 ] && [ -z "$lastarg" ]; }; then
381 die "invalid options"
383 if [ $nonopts -eq 1 ]; then
384 packbase="$lastarg"
385 else
386 packbase="$gdo/pack/pack"
388 pbd="$(dirname "$packbase")"
389 [ -e "$pbd" ] && [ -d "$pbd" ] || die "no such directory: $packbase"
390 packbase="$(cd "$(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
391 pbd="$(dirname "$packbase")"
392 [ -e "$pbd" ] && [ -d "$pbd" ] || die "internal failure realpathing: $packbase"
393 packbasecheck="$packbase"
394 case "$packbase" in "$gd"/?*)
395 packbase="${packbase#$gd/}"
396 esac
397 [ $nonopts -eq 1 ] || packbasearg="$packbase"
398 [ -z "$zap" ] || [ -n "$packbasearg" ] || die "--replace does not allow specifying pack-base"
399 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdo/pack" ] ; then
400 die "--replace and pack base dir not <git-dir-objects>/pack" >&2
403 td="$(mktemp -d "$gd/cmbnpcks-XXXXXX")"
404 tdmin="$(basename "$td")"
405 failed="$td/failed"
406 listok="$td/listok"
407 packok="$td/packok"
408 popid="$td/popid"
409 success="$td/success"
410 cm="$tdmin/commits"
411 cmo="$tdmin/ordered"
412 tg="$tdmin/tags"
413 tr="$tdmin/trees"
414 bl="$tdmin/blobs"
415 ms="$tdmin/missing"
416 trbl="$tdmin/treesblobs"
417 named="$tdmin/named"
418 named2="$tdmin/named2"
420 get_pack_base() {
421 _name="$1"
422 case "$_name" in
423 $octet20)
424 _name="$gdo/pack/pack-$_name"
426 *.idx)
427 _name="${_name%.idx}"
429 *.pack)
430 _name="${_name%.pack}"
432 esac
433 if ! [ -e "$_name.idx" ] && ! [ -e "$_name.pack" ]; then
434 case "$_name" in */*) :;; *)
435 _name="$gdo/pack/$_name"
436 esac
438 if ! [ -f "$_name.idx" ] || ! [ -s "$_name.idx" ] ||
439 ! [ -f "$_name.pack" ] || ! [ -s "$_name.pack" ]; then
440 [ -z "$ignoremiss" ] || return 0
441 die "no such pack found matching: $1" >&2
443 _name="$(cd "$(dirname "$_name")" && pwd -P)/$(basename "$_name")"
444 if ! [ -f "$_name.idx" ] || ! [ -s "$_name.idx" ] ||
445 ! [ -f "$_name.pack" ] || ! [ -s "$_name.pack" ]; then
446 die "internal failure realpathing: $1" >&2
448 _namecheck="$_name"
449 case "$(dirname "$_name")" in "$gd"/?*)
450 _name="${_name#$gd/}"
451 esac
452 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdo/pack" ]; then
453 die "--replace and pack not in <git-dir-objects>/pack: $1" >&2
455 echo "$_name"
456 return 0
459 # add "old" prefix to passed in existing files, but be careful to hard-link
460 # ALL the files to be renamed to the renamed name BEFORE removing anything
461 move_aside() {
462 for _f; do
463 ! [ -f "$_f" ] ||
464 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
465 done
466 for _f; do
467 if [ -f "$_f" ]; then
468 rm -f "$_f"
469 ! test -f "$_f"
471 done
472 return 0
475 list_loose_objects() (
476 cd "$gdo" || return 1
477 objdirs="$(echo $octet)"
478 [ "$objdirs" != "$octet" ] || return 0
479 find $objdirs -mindepth 1 -maxdepth 1 -type f -name "$octet19" -print | sed 's,/,,'
482 origdir="$PWD"
483 cd "$gd"
484 >"$cm"
485 >"$cmo"
486 >"$tr"
487 >"$bl"
488 if [ -n "$objectlist" ]; then
489 gcf='git cat-file $gcfbo --batch-check"$gcfbf"'
490 [ -z "$looselist" ] || gcf='{ list_loose_objects && cat; } | '"$gcf"
491 eval "$gcf"
492 else
493 [ -z "$zap" ] || find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
495 [ -z "$looselist" ] || list_loose_objects
496 while IFS=': ' read -r packraw junk; do
497 pack="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack: $packraw")"
498 if [ -n "$pack" ]; then
499 [ -z "$zap" ] || [ -e "$pack.keep" ] || >"$pack.$zap"
500 git show-index <"$pack.idx"
502 done | cut -d ' ' -f 2
503 } | git cat-file $gcfbo --batch-check"$gcfbf"
504 fi | awk '{
505 if ($2=="tree") print $1
506 else if ($2=="blob") print $1 >"'"$bl"'"
507 else if ($2=="commit") print $1 >"'"$cm"'"
508 else if ($2=="tag") print $1 >"'"$tg"'"
509 else if ($2=="missing") print $1 >"'"$ms"'"
510 }' | sort -u >"$tr"
511 [ -n "$missok" ] || ! [ -s "$ms" ] || die "missing" $(wc -l <"$ms") "object(s)"
512 echo "g" | cat "$tr" "$bl" - | sort -u >"$trbl"
513 git rev-list --no-walk --objects --stdin <"$cm" |
514 awk '{
515 if ($1!=$0) print NR " " $0
516 else print $0 >"'"$cmo"'"
517 }' |
518 sort -t " " -k2,2 |
519 join -t " " -1 2 - "$trbl" >"$named"
520 join -t " " -v 1 "$tr" "$named" |
521 git rev-list --no-walk --objects --stdin |
522 awk '{print NR " " $0}' |
523 sort -t " " -k2,2 |
524 join -t " " -1 2 - "$trbl" >"$named2"
525 pocmd='git pack-objects --delta-base-offset "$@"'
526 [ -z "$packbasearg" ] || pocmd="$pocmd \"${packbasearg}tmp\""
528 cat "$cmo"
529 ! [ -s "$tg" ] || {
530 if [ -n "$noperl" ]; then
531 sort -u "$tg"
532 else
533 git cat-file $gcfbo --batch <"$tg" | perl -e "$perlprog"
537 join -t " " "$named" "$tr" |
538 sort -t " " -k2,2n
539 join -t " " "$named2" "$tr" |
540 sort -t " " -k2,2n
541 } | sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
543 join -t " " -v 1 "$named" "$tr" |
544 sort -t " " -k2,2n
545 join -t " " -v 1 "$named2" "$tr" |
546 sort -t " " -k2,2n
547 } | awk -F '[ ]' '{
548 if (NF >= 3) {
549 nm = substr($0, length($1) + length($2) + 3)
550 sfx = nm
551 gsub(/[\t\n\013\f\r ]+/, "", sfx)
552 if (length(sfx)) {
553 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
554 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
555 split(sfx, c, "")
556 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
557 sub(/[ ]+$/, "", r)
558 print NR " " $1 " " r " " nm
559 } else print NR " " $1 " " nm
560 } else print NR " " $1 " "
561 }' | sort -t " " -k3,3 -k1,1n | awk -F '[ ]' '{
562 if (NF >= 4) {
563 nm = substr($0, length($1) + length($2) + length($3) + 4)
564 print $2 " " nm
565 } else print $2 " "
567 sort -u "$bl"
568 >"$listok"
569 } | {
570 sh -c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh "$popid" "$pocmd" "$@" || {
571 rm -f "$popid"
572 die "git pack-objects failed"
574 rm -f "$popid"
575 >"$packok"
577 while read -r newpack; do
578 if [ -n "$packbasearg" ]; then
579 move_aside "$packbasearg"-$newpack.*
580 ln -f "${packbasearg}tmp"-$newpack.pack "$packbasearg"-$newpack.pack
581 ln -f "${packbasearg}tmp"-$newpack.idx "$packbasearg"-$newpack.idx
582 rm -f "${packbasearg}tmp"-$newpack.*
584 [ -z "$names" ] || echo "$newpack"
585 done
586 [ $? -eq 0 ] && ! [ -e "$failed" ] && [ -e "$listok" ] && [ -e "$packok" ] ||
587 die "unspecified failure"
588 if [ -n "$zap" ]; then
589 find "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -print |
590 while read -r remove; do
591 rm -f "${remove%.$zap}".*
592 done
594 >"$success"