jobd.sh: run jobd.pl more efficiently
[girocco.git] / jobd / combine-packs.sh
blobe1ab420e91ef484d4ab9ca1568d658257e996a2d
1 #!/bin/sh
3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016,2017 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Version 1.1.22
21 USAGE="
22 printf '%s\n' path-to-pack[.idx|.pack] ... |
23 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus '\n' to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --ignore-missing-objects
36 silently ignore missing objects (explicit objects when
37 using --objects otherwise those contained in input packs)
39 --loose add the list of all currently existing loose objects in
40 the repository to the list of objects to pack
42 --objects input is a list of object hash id values instead of packs
44 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
45 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
47 --sort-tags-by-id
48 sort tags by object id rather than embedded tag name
49 using this option avoids using perl when tags are present
51 If --replace is given, ALL packs to be combined MUST be located in
52 the objects/pack subdirectory of the current git directory AND the output
53 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
55 The --loose option can be used both with and without the --objects option. If
56 there are no currently existing loose objects in the repository's objects/
57 directory then it's effectively silently ignored.
59 Note that if --objects is used then --replace and --ignore-missing are invalid.
61 Unless --ignore-missing-objects is given, any input objects (either given
62 explicitly when using --objects otherwise those contained in the input packs)
63 that are not present in the current git directory (respecting the value of
64 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
65 directories, if any, will cause combine-packs to fail.
66 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
67 the output pack(s)!
69 Unless the --sort-tags-by-id option is used then perl will be used if available
70 and any tag objects are present in the input. It provides the only efficient
71 way to extract the embedded tag name from a batch of tag objects reliably.
72 However, since the only reason the tag name is extracted is to sort the tag
73 objects for better tag deltification, if the tag objects are sorted by the
74 tag object id there is never any need to run perl. In practice, tag objects
75 rarely generate deltas and there are almost never enough tag objects in the
76 first place for the size savings of the almost-never-happens tag
77 deltification to matter anyway. This option will be activated automatically
78 if perl does not appear to be available.
80 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
81 the current git directory (as output by \`git rev-parse --git-dir\` or
82 by \`git rev-parse --git-common-dir\` for Git version 2.5 or later).
84 If a <pack-name> does not exist and contains no '/' characters then it is
85 retried as objects/pack/<pack-name> instead.
87 Packs to be combined MUST have an associated .idx file.
89 The pack-base-name may be a relative path name and if so, is ALWAYS relative
90 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
92 If not given, then the pack-base-name defaults to objects/pack/pack
93 relative to the current git directory.
95 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
96 is given to allow it) then everywhere above where it says \"objects/\" is
97 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
98 And, obviously, that location is no longer necessarily a subdirectory of the
99 current git directory either.
101 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
102 the ONLY option that is automatically passed (but remember that --reuse-delta
103 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
105 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
106 --stdout are forbidden. Although --keep-true-parents is allowed it should
107 not have any effect at all. Using --incremental is recommended only for
108 wizards or with --objects as in most other cases it will result in an empty
109 pack being output. The combination of --loose --objects --incremental will
110 pack up all loose objects not already in a pack (and nothing else if standard
111 input is redirected to /dev/null in which case the --objects is optional).
113 WARNING: the move_aside logic currently only works when pack-base-name is
114 completely omitted!
117 set -e
119 # $$ should be the same in subshells, but just in case, remember it
120 cp_pid=$$
122 perlprog='
123 #!/usr/bin/perl
124 #line 100 "combine-packs.sh"
125 use strict;
126 use warnings;
128 sub discard {
129 my $count = shift;
130 my $x = "";
131 while ($count >= 32768) {
132 read(STDIN, $x, 32768);
133 $count -= 32768;
135 read(STDIN, $x, $count) if $count;
138 my @tags = ();
139 binmode STDIN;
140 while (<STDIN>) {
141 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
142 my ($h, $t, $l) = ($1, $2, $3);
143 my $te = 0;
144 my $tn = "";
145 discard(1 + $l), next unless $2 eq "tag";
146 my $count = 0;
147 while (<STDIN>) {
148 $count += length($_);
149 chomp;
150 last if /^$/;
151 $tn = $1 if /^tag ([^ ]+)$/;
152 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
153 last if $tn && $te;
155 discard(1 + $l - $count);
156 push(@tags, [$te, "$h $tn\n"]);
159 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
163 zap=
165 gdo=
167 cleanup_on_exit() {
168 ewf=
169 [ -n "$td" ] && [ -e "$td/success" ] || ewf=1
170 [ -z "$td" ] || ! [ -e "$td" ] || rm -rf "$td" || :
171 [ -z "$gdo" ] || [ -z "$zap" ] || command find -L "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -exec rm -f '{}' + || :
172 [ -z "$ewf" ] || echo "combine_packs: exiting with failure" >&2 || :
175 trap cleanup_on_exit EXIT
176 trap 'exit 129' HUP
177 trap 'exit 130' INT
178 trap 'exit 131' QUIT
179 trap 'exit 143' TERM
181 die() {
182 echo "combine-packs: fatal: $*" >&2 || :
183 # In case we are in a sub shell force the entire command to exit
184 # The trap on TERM will make sure cleanup still happens in this case
185 extrapid=
186 [ -z "$td" ] || ! [ -s "$td/popid" ] || extrapid="$(cat "$td/popid")" || :
187 kill $cp_pid $extrapid || :
188 exit 1
191 cmd_path() (
192 "unset" -f unalias command "$1" >/dev/null 2>&1 || :
193 "unalias" -a >/dev/null 2>&1 || :
194 "command" -v "$1"
195 ) 2>/dev/null
197 # This extra indirection shouldn't be necessary, but it is for some broken sh
198 # in order for a failure to not prematurely exit die_on_fail with set -e active
199 do_command() (
200 # some shells do not handle "exec command ..." properly but just a
201 # plain "exec ..." has the same semantics so "command" is omitted here
202 LC_ALL=C exec "$@"
205 die_on_fail() {
206 do_command "$@" || {
207 _ec=$?
208 [ -z "$td" ] || >"$td/failed" || :
209 die "failed command ($_ec): $*"
213 # These commands may be the non-final member of a pipe and
214 # MUST NOT be allowed to silently fail without consequence
215 awk() { die_on_fail awk "$@"; }
216 cat() { die_on_fail cat "$@"; }
217 cut() { die_on_fail cut "$@"; }
218 find() { die_on_fail find "$@"; }
219 git() { die_on_fail git "$@"; }
220 join() { die_on_fail join "$@"; }
221 perl() { die_on_fail perl "$@"; }
222 sed() { die_on_fail sed "$@"; }
223 sort() { die_on_fail sort "$@"; }
225 octet='[0-9a-f][0-9a-f]'
226 octet4="$octet$octet$octet$octet"
227 octet19="$octet4$octet4$octet4$octet4$octet$octet$octet"
228 octet20="$octet4$octet4$octet4$octet4$octet4"
230 names=
231 ignoremiss=
232 looselist=
233 objectlist=
234 dozap=
235 envok=
236 missok=
237 noperl=
239 while [ $# -ge 1 ]; do case "$1" in
240 --names)
241 names=1
242 shift
244 --replace)
245 dozap="zap-$$"
246 shift
248 --ignore-missing)
249 ignoremiss=1
250 shift
252 --ignore-missing-objects)
253 missok=1
254 shift
256 -h|--help)
257 trap - EXIT
258 if [ -t 1 ] && pg="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
259 printf '%s' "${USAGE#?}" | eval "$pg" || :
260 else
261 printf '%s' "${USAGE#?}" || :
263 exit 0
265 --loose)
266 looselist=1
267 shift
269 --objects)
270 objectlist=1
271 shift
273 --envok)
274 envok=1
275 shift
277 --sort-tags-by-id)
278 noperl=1
279 shift
282 break
284 esac; done
285 [ -z "$ignoremiss$dozap" ] || [ -z "$objectlist" ] || die "invalid options"
287 # Always make sure we get the specified objects
288 GIT_NO_REPLACE_OBJECTS=1
289 export GIT_NO_REPLACE_OBJECTS
290 gd="$(git rev-parse --git-dir)" && [ -n "$gd" ] ||
291 die "git rev-parse --git-dir failed"
292 gv="$(git --version)"
293 gv="${gv#[Gg]it version }"
294 gv="${gv%%[!0-9.]*}"
295 IFS=. read -r gvmaj gvmin gvpat <<EOT
298 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
299 # git rev-parse added --no-walk support in 1.5.3 which is required
300 # git cat-file added --batch-check support in 1.5.6 which is required
301 if [ $gvmaj -lt 1 ] || { [ $gvmaj -eq 1 ] && [ $gvmin -lt 5 ]; } ||
302 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 5 ] && [ $gvpat -lt 6 ]; }; then
303 die "combine-packs requires at least Git version 1.5.6"
305 tmp="$gd"
306 gd="$(cd "$gd" && pwd -P)" || die "cd failed: $tmp"
307 # git rev-parse added --git-common-dir in 2.5
308 if [ $gvmaj -gt 2 ] || { [ $gvmaj -eq 2 ] && [ $gvmin -ge 5 ]; }; then
309 # rev-parse --git-common-dir is broken and may give an
310 # incorrect result without a suitable current directory
311 tmp="$gd"
312 gd="$(cd "$gd" && cd "$(git rev-parse --git-common-dir)" && pwd -P)" &&
313 [ -n "$gd" ] ||
314 die "git rev-parse --git-common-dir failed from: $tmp"
316 # gcfbf is Git Cat-File --Batch-check=Format Option :)
317 gcfbf=
318 if [ $gvmaj -gt 1 ] || { [ $gvmaj -eq 1 ] && [ $gvmin -gt 8 ]; } ||
319 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 8 ] && [ $gvpat -ge 5 ]; }; then
320 gcfbf='=%(objectname) %(objecttype)'
322 # gcfbo is Git Cat-File --Buffer Option :)
323 gcfbo=
324 if [ $gvmaj -gt 2 ] || { [ $gvmaj -eq 2 ] && [ $gvmin -ge 6 ]; }; then
325 gcfbo=--buffer
327 [ -n "$noperl" ] || perlbin="$(cmd_path perl)" && [ -n "$perlbin" ] || noperl=1
328 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
329 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
330 godok=
331 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] &&
332 [ -d "$gd/objects" ] && godfp="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" &&
333 gdofp="$(cd "$gd/objects" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] &&
334 [ "$gdofp" = "$godfp" ]; then
335 godok=1
337 if [ -z "$godok" ]; then
338 die "GIT_OBJECT_DIRECTORY set to non-default location without --envok"
341 gdo="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
342 tmp="$gdo"
343 gdo="$(cd "$gdo" && pwd -P)" || die "cd failed: $tmp"
344 [ -d "$gdo/pack" ] || die "no such directory: $gdo/pack"
345 zap="$dozap"
347 lastarg=
348 lastargopt=
349 packbase=
350 packbasearg=
351 nonopts=0
352 for arg; do
353 lastarg="$arg"
354 lastargopt=1
355 case "$arg" in
356 --replace|--names|--ignore-missing|-h|--help|--objects)
357 die "invalid options"
359 --revs|--unpacked|--all|--reflog|--indexed-objects)
360 die "forbidden pack-objects options"
366 lastargopt=
367 nonopts=$(( $nonopts + 1 ))
368 esac
369 done
370 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] ||
371 { [ $nonopts -eq 1 ] && [ -n "$lastargopt" ]; } ||
372 { [ $nonopts -eq 1 ] && [ -z "$lastarg" ]; }; then
373 die "invalid options"
375 if [ $nonopts -eq 1 ]; then
376 packbase="$lastarg"
377 else
378 packbase="$gdo/pack/pack"
380 pbd="$(dirname "$packbase")"
381 [ -e "$pbd" ] && [ -d "$pbd" ] || die "no such directory: $packbase"
382 packbase="$(cd "$(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
383 pbd="$(dirname "$packbase")"
384 [ -e "$pbd" ] && [ -d "$pbd" ] || die "internal failure realpathing: $packbase"
385 packbasecheck="$packbase"
386 case "$packbase" in "$gd"/?*)
387 packbase="${packbase#$gd/}"
388 esac
389 [ $nonopts -eq 1 ] || packbasearg="$packbase"
390 [ -z "$zap" ] || [ -n "$packbasearg" ] || die "--replace does not allow specifying pack-base"
391 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdo/pack" ] ; then
392 die "--replace and pack base dir not <git-dir-objects>/pack" >&2
395 td="$(mktemp -d "$gd/cmbnpcks-XXXXXX")"
396 tdmin="$(basename "$td")"
397 failed="$td/failed"
398 listok="$td/listok"
399 packok="$td/packok"
400 popid="$td/popid"
401 success="$td/success"
402 cm="$tdmin/commits"
403 cmo="$tdmin/ordered"
404 tg="$tdmin/tags"
405 tr="$tdmin/trees"
406 bl="$tdmin/blobs"
407 ms="$tdmin/missing"
408 trbl="$tdmin/treesblobs"
409 named="$tdmin/named"
410 named2="$tdmin/named2"
412 get_pack_base() {
413 _name="$1"
414 case "$_name" in
415 $octet20)
416 _name="$gdo/pack/pack-$_name"
418 *.idx)
419 _name="${_name%.idx}"
421 *.pack)
422 _name="${_name%.pack}"
424 esac
425 if ! [ -e "$_name.idx" ] && ! [ -e "$_name.pack" ]; then
426 case "$_name" in */*) :;; *)
427 _name="$gdo/pack/$_name"
428 esac
430 if ! [ -f "$_name.idx" ] || ! [ -s "$_name.idx" ] ||
431 ! [ -f "$_name.pack" ] || ! [ -s "$_name.pack" ]; then
432 [ -z "$ignoremiss" ] || return 0
433 die "no such pack found matching: $1" >&2
435 _name="$(cd "$(dirname "$_name")" && pwd -P)/$(basename "$_name")"
436 if ! [ -f "$_name.idx" ] || ! [ -s "$_name.idx" ] ||
437 ! [ -f "$_name.pack" ] || ! [ -s "$_name.pack" ]; then
438 die "internal failure realpathing: $1" >&2
440 _namecheck="$_name"
441 case "$(dirname "$_name")" in "$gd"/?*)
442 _name="${_name#$gd/}"
443 esac
444 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdo/pack" ]; then
445 die "--replace and pack not in <git-dir-objects>/pack: $1" >&2
447 echo "$_name"
448 return 0
451 # add "old" prefix to passed in existing files, but be careful to hard-link
452 # ALL the files to be renamed to the renamed name BEFORE removing anything
453 move_aside() {
454 for _f; do
455 ! [ -f "$_f" ] ||
456 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
457 done
458 for _f; do
459 if [ -f "$_f" ]; then
460 rm -f "$_f"
461 ! test -f "$_f"
463 done
464 return 0
467 list_loose_objects() (
468 cd "$gdo" || return 1
469 objdirs="$(echo $octet)"
470 [ "$objdirs" != "$octet" ] || return 0
471 find -L $objdirs -mindepth 1 -maxdepth 1 -type f -name "$octet19" -print | sed 's,/,,'
474 origdir="$PWD"
475 cd "$gd"
476 >"$cm"
477 >"$cmo"
478 >"$tr"
479 >"$bl"
480 if [ -n "$objectlist" ]; then
481 gcf='git cat-file $gcfbo --batch-check"$gcfbf"'
482 [ -z "$looselist" ] || gcf='{ list_loose_objects && cat; } | '"$gcf"
483 eval "$gcf"
484 else
485 [ -z "$zap" ] || command find -L "$gdo/pack" -maxdepth 1 -type f -name "*.$zap" -exec rm -f '{}' + || :
487 [ -z "$looselist" ] || list_loose_objects
488 while IFS=': ' read -r packraw junk; do
489 pack="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack: $packraw")"
490 if [ -n "$pack" ]; then
491 [ -z "$zap" ] || [ -e "$pack.keep" ] || >"$pack.$zap"
492 git show-index <"$pack.idx"
494 done | cut -d ' ' -f 2
495 } | git cat-file $gcfbo --batch-check"$gcfbf"
496 fi | awk '{
497 if ($2=="tree") print $1
498 else if ($2=="blob") print $1 >"'"$bl"'"
499 else if ($2=="commit") print $1 >"'"$cm"'"
500 else if ($2=="tag") print $1 >"'"$tg"'"
501 else if ($2=="missing") print $1 >"'"$ms"'"
502 }' | sort -u >"$tr"
503 [ -n "$missok" ] || ! [ -s "$ms" ] || die "missing" $(wc -l <"$ms") "object(s)"
504 echo "g" | cat "$tr" "$bl" - | sort -u >"$trbl"
505 git rev-list --no-walk --objects --stdin <"$cm" |
506 awk '{
507 if ($1!=$0) print NR " " $0
508 else print $0 >"'"$cmo"'"
509 }' |
510 sort -t " " -k2,2 |
511 join -t " " -1 2 - "$trbl" >"$named"
512 join -t " " -v 1 "$tr" "$named" |
513 git rev-list --no-walk --objects --stdin |
514 awk '{print NR " " $0}' |
515 sort -t " " -k2,2 |
516 join -t " " -1 2 - "$trbl" >"$named2"
517 pocmd='git pack-objects --delta-base-offset "$@"'
518 [ -z "$packbasearg" ] || pocmd="$pocmd \"${packbasearg}tmp\""
520 cat "$cmo"
521 ! [ -s "$tg" ] || {
522 if [ -n "$noperl" ]; then
523 sort -u "$tg"
524 else
525 git cat-file $gcfbo --batch <"$tg" | perl -e "$perlprog"
529 join -t " " "$named" "$tr" |
530 sort -t " " -k2,2n
531 join -t " " "$named2" "$tr" |
532 sort -t " " -k2,2n
533 } | sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
535 join -t " " -v 1 "$named" "$tr" |
536 sort -t " " -k2,2n
537 join -t " " -v 1 "$named2" "$tr" |
538 sort -t " " -k2,2n
539 } | awk -F '[ ]' '{
540 if (NF >= 3) {
541 nm = substr($0, length($1) + length($2) + 3)
542 sfx = nm
543 gsub(/[\t\n\013\f\r ]+/, "", sfx)
544 if (length(sfx)) {
545 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
546 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
547 split(sfx, c, "")
548 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
549 sub(/[ ]+$/, "", r)
550 print NR " " $1 " " r " " nm
551 } else print NR " " $1 " " nm
552 } else print NR " " $1 " "
553 }' | sort -t " " -k3,3 -k1,1n | awk -F '[ ]' '{
554 if (NF >= 4) {
555 nm = substr($0, length($1) + length($2) + length($3) + 4)
556 print $2 " " nm
557 } else print $2 " "
559 sort -u "$bl"
560 >"$listok"
561 } | {
562 sh -c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh "$popid" "$pocmd" "$@" || {
563 rm -f "$popid"
564 die "git pack-objects failed"
566 rm -f "$popid"
567 >"$packok"
569 while read -r newpack; do
570 if [ -n "$packbasearg" ]; then
571 move_aside "$packbasearg"-$newpack.*
572 ln -f "${packbasearg}tmp"-$newpack.pack "$packbasearg"-$newpack.pack
573 ln -f "${packbasearg}tmp"-$newpack.idx "$packbasearg"-$newpack.idx
574 rm -f "${packbasearg}tmp"-$newpack.*
576 [ -z "$names" ] || echo "$newpack"
577 done
578 [ $? -eq 0 ] && ! [ -e "$failed" ] && [ -e "$listok" ] && [ -e "$packok" ] ||
579 die "unspecified failure"
580 if [ -n "$zap" ]; then
581 (cd "$gdo" && [ -d "pack" ] && find -L "pack" -maxdepth 1 -type f -name "*.$zap" -print) |
582 while read -r remove; do
583 rm -f "$gdo/${remove%.$zap}".*
584 done
586 >"$success"