fast-export.git: pick up latest and adopt changes
[girocco.git] / jobd / combine-packs.sh
blob92e34cf4c538d738d1e2ab2cf0dac0c3b6baf5dd
1 #!/bin/sh
3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Version 1.1.8
21 USAGE='
22 printf "%s\n" path-to-pack[.idx|.pack] ... |
23 combine-packs [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
28 (but any input packs with a .keep are never removed)
30 --names output the 40-char hex sha1 plus "\n" to stdout for each
31 newly created pack(s), if any
33 --ignore-missing silently ignore input pack file names that do not exist
35 --objects input is a list of object hash id values instead of packs
37 If --replace is given, ALL packs to be combined MUST be located in
38 the objects/pack subdirectory of the current git directory AND the output
39 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
41 Note that if --objects is used then --replace and --ignore-missing are invalid
42 and any missing input objects are always silently ignored.
44 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
45 the current git directory (as output by `git rev-parse --git-dir`).
47 If a <pack-name> does not exist and contains no "/" characters then it is
48 retried as objects/pack/<pack-name> instead.
50 Packs to be combined MUST have an associated .idx file.
52 The pack-base-name may be a relative path name and if so, is ALWAYS relative
53 to the current git directory.
55 If not given, then the pack-base-name defaults to objects/pack/pack
56 relative to the current git directory.
58 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
59 the ONLY option that is automatically passed (but remember that --reuse-delta
60 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
62 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
63 --stdout are forbidden. Although --keep-true-parents is allowed it should
64 not have any effect at all. Using --incremental is recommended only for
65 wizards or with --objects as in most other cases it will result in an empty
66 pack being output.
68 WARNING: the move_aside logic currently only works when pack-base-name is
69 completely omitted!
72 set -e
74 # $$ should be the same in subshells, but just in case, remember it
75 cp_pid=$$
77 perlprog='
78 #!/usr/bin/perl
79 #line 80 "combine-packs.sh"
80 use strict;
81 use warnings;
83 sub discard {
84 my $count = shift;
85 my $x = "";
86 while ($count >= 32768) {
87 read(STDIN, $x, 32768);
88 $count -= 32768;
90 read(STDIN, $x, $count) if $count;
93 my @tags = ();
94 binmode STDIN;
95 while (<STDIN>) {
96 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
97 my ($h, $t, $l) = ($1, $2, $3);
98 my $te = 0;
99 my $tn = "";
100 discard(1 + $l), next unless $2 eq "tag";
101 my $count = 0;
102 while (<STDIN>) {
103 $count += length($_);
104 chomp;
105 last if /^$/;
106 $tn = $1 if /^tag ([^ ]+)$/;
107 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
108 last if $tn && $te;
110 discard(1 + $l - $count);
111 push(@tags, [$te, "$h $tn\n"]);
114 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
117 # On some broken platforms running xargs without -r and empty input runs the command
118 xargs_r="$(: | command xargs echo -r)"
120 # Some platforms' broken xargs runs the command always at least once even if
121 # there's no input unless given a special option. Automatically supply the
122 # option on those platforms by providing an xargs function.
123 xargs() { command xargs $xargs_r "$@"; }
126 zap=
128 cleanup_on_exit() {
129 ewf=
130 [ -n "$td" ] && [ -e "$td/success" ] || ewf=1
131 [ -z "$td" ] || ! [ -e "$td" ] || rm -rf "$td" || :
132 [ -z "$gd" -o -z "$zap" ] || command find "$gd/objects/pack" -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
133 [ -z "$ewf" ] || echo "combine_packs: exiting with failure" >&2 || :
136 trap cleanup_on_exit EXIT
137 trap 'exit 129' HUP
138 trap 'exit 130' INT
139 trap 'exit 131' QUIT
140 trap 'exit 143' TERM
142 die() {
143 echo "combine-packs: fatal: $*" >&2 || :
144 # In case we are in a sub shell force the entire command to exit
145 # The trap on TERM will make sure cleanup still happens in this case
146 extrapid=
147 [ -z "$td" ] || [ ! -s "$td/popid" ] || extrapid=$(cat "$td/popid" || :)
148 kill $cp_pid $extrapid || :
149 exit 1
152 # This extra indirection shouldn't be necessary, but it is for some broken sh
153 # in order for a failure to not prematurely exit die_on_fail with set -e active
154 do_command() (
155 LC_ALL=C exec command "$@"
158 die_on_fail() {
159 if ! do_command "$@"; then
160 [ -z "$td" ] || >"$td/failed" || :
161 die "failed command: $*"
165 # These commands may be the non-final member of a pipe and
166 # MUST NOT be allowed to silently fail without consequence
167 awk() { die_on_fail awk "$@"; }
168 cat() { die_on_fail cat "$@"; }
169 cut() { die_on_fail cut "$@"; }
170 find() { die_on_fail find "$@"; }
171 git() { die_on_fail git "$@"; }
172 join() { die_on_fail join "$@"; }
173 perl() { die_on_fail perl "$@"; }
174 sed() { die_on_fail sed "$@"; }
175 sort() { die_on_fail sort "$@"; }
177 octet='[0-9a-f][0-9a-f]'
178 octet4="$octet$octet$octet$octet"
179 octet20="$octet4$octet4$octet4$octet4$octet4"
181 names=
182 ignoremiss=
183 objectlist=
184 dozap=
186 while [ $# -ge 1 ]; do case "$1" in
187 --names)
188 names=1
189 shift
191 --replace)
192 dozap="zap-$$"
193 shift
195 --ignore-missing)
196 ignoremiss=1
197 shift
199 -h|--help)
200 printf '%s' "${USAGE#?}"
201 trap - EXIT
202 exit 0
204 --objects)
205 objectlist=1
206 shift
209 break
211 esac; done
212 [ -z "$ignoremiss$dozap" -o -z "$objectlist" ] || die "invalid options"
214 # Always make sure we get the specified objects
215 GIT_NO_REPLACE_OBJECTS=1
216 export GIT_NO_REPLACE_OBJECTS
217 gd="$(git rev-parse --git-dir)"
218 gv="$(git version)"
219 gv="${gv#[Gg]it version }"
220 gv="${gv%%[!0-9.]*}"
221 IFS=. read -r gvmaj gvmin gvpat <<EOT
224 : ${gvmaj:=0} ${gvmin:=0} ${gvpat:=0}
225 # gcfbo is Git Cat-File --Buffer Option :)
226 gcfbo=
227 if [ $gvmaj -gt 2 ] || [ $gvmaj -eq 2 -a $gvmin -ge 6 ]; then
228 gcfbo=--buffer
230 gd="$(cd "$gd" && pwd -P)" || die "cd failed: $gd"
231 [ -d "$gd/objects/pack" ] || die "no such directory: $gd/objects/pack"
232 zap="$dozap"
234 lastarg=
235 lastargopt=
236 packbase=
237 packbasearg=
238 nonopts=0
239 for arg; do
240 lastarg="$arg"
241 lastargopt=1
242 case "$arg" in
243 --replace|--names|--ignore-missing|-h|--help|--objects)
244 die "invalid options"
246 --revs|--unpacked|--all|--reflog|--indexed-objects)
247 die "forbidden pack-objects options"
253 lastargopt=
254 nonopts=$(( $nonopts + 1 ))
255 esac
256 done
257 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] || [ $nonopts -eq 1 -a -n "$lastargopt" ] || \
258 [ $nonopts -eq 1 -a -z "$lastarg" ]; then
259 die "invalid options"
261 if [ $nonopts -eq 1 ]; then
262 packbase="$lastarg"
263 else
264 packbase="$gd/objects/pack/pack"
266 pbd="$(dirname "$packbase")"
267 [ -e "$pbd" -a -d "$pbd" ] || die "no such directory: $packbase"
268 packbase="$(cd "$(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
269 pbd="$(dirname "$packbase")"
270 [ -e "$pbd" -a -d "$pbd" ] || die "internal failure realpathing: $packbase"
271 case "$packbase" in "$gd"/?*)
272 packbase="${packbase#$gd/}"
273 esac
274 [ $nonopts -eq 1 ] || packbasearg="$packbase"
275 [ -z "$zap" -o -n "$packbasearg" ] || die "--replace does not allow specifying pack-base"
276 if [ -n "$zap" ] && [ "$(dirname "$packbase")" != "objects/pack" ]; then
277 die "--replace and pack base dir not <git-dir>/objects/pack" >&2
280 td="$(mktemp -d "$gd/cmbnpcks-XXXXXX")"
281 tdmin="$(basename "$td")"
282 failed="$td/failed"
283 listok="$td/listok"
284 packok="$td/packok"
285 popid="$td/popid"
286 success="$td/success"
287 cm="$tdmin/commits"
288 tg="$tdmin/tags"
289 tr="$tdmin/trees"
290 bl="$tdmin/blobs"
291 trbl="$tdmin/treesblobs"
292 named="$tdmin/named"
294 get_pack_base() {
295 _name="$1"
296 case "$_name" in
297 $octet20)
298 _name="$gd/objects/pack/pack-$_name"
300 *.idx)
301 _name="${_name%.idx}"
303 *.pack)
304 _name="${_name%.pack}"
306 esac
307 if ! [ -e "$_name.idx" -o -e "$_name.pack" ]; then
308 case "$_name" in */*) :;; *)
309 _name="$gd/objects/pack/$_name"
310 esac
312 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
313 [ -z "$ignoremiss" ] || return 0
314 die "no such pack found matching: $1" >&2
316 _name="$(cd "$(dirname "$_name")" && pwd -P)/$(basename "$_name")"
317 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
318 die "internal failure realpathing: $1" >&2
320 case "$(dirname "$_name")" in "$gd"/?*)
321 _name="${_name#$gd/}"
322 esac
323 if [ -n "$zap" ] && [ "$(dirname "$_name")" != "objects/pack" ]; then
324 die "--replace and pack not in <git-dir>/objects/pack: $1" >&2
326 echo "$_name"
327 return 0
330 # add "old" prefix to passed in existing files, but be careful to hard-link
331 # ALL the files to be renamed to the renamed name BEFORE removing anything
332 move_aside() {
333 for _f; do
334 ! [ -f "$_f" ] || \
335 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
336 done
337 for _f; do
338 if [ -f "$_f" ]; then
339 rm -f "$_f"
340 ! test -f "$_f"
342 done
343 return 0
346 origdir="$PWD"
347 cd "$gd"
348 >"$cm"
349 >"$tr"
350 >"$bl"
351 if [ -n "$objectlist" ]; then
352 git cat-file $gcfbo --batch-check='%(objectname) %(objecttype)'
353 else
354 [ -z "$zap" ] || find objects/pack -maxdepth 1 -type f -name "*.$zap" -print0 | xargs -0 rm -f || :
355 while IFS=': ' read -r packraw junk; do
356 pack="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack: $packraw")"
357 if [ -n "$pack" ]; then
358 [ -z "$zap" ] || [ -e "$pack.keep" ] || >"$pack.$zap"
359 git show-index <"$pack.idx"
361 done | cut -d ' ' -f 2 |
362 git cat-file $gcfbo --batch-check='%(objectname) %(objecttype)'
363 fi | awk '{
364 if ($2=="tree") print $1
365 else if ($2=="blob") print $1 >"'"$bl"'"
366 else if ($2=="commit") print $1 >"'"$cm"'"
367 else if ($2=="tag") print $1 >"'"$tg"'"
368 }' | sort -u >"$tr"
369 cat "$tr" "$bl" | sort -u >"$trbl"
370 git rev-list --no-walk --objects --stdin <"$cm" |
371 awk '{print NR " " $0}' |
372 sort -k2,2 |
373 join -t " " -1 2 - "$trbl" >"$named"
374 pocmd='git pack-objects --delta-base-offset "$@"'
375 [ -z "$packbasearg" ] || pocmd="$pocmd \"${packbasearg}tmp\""
377 cat "$cm"
378 ! [ -s "$tg" ] || git cat-file $gcfbo --batch <"$tg" | perl -e "$perlprog"
379 sort -k2,2n <"$named" |
380 sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
381 join -t " " -v 1 "$tr" "$named" |
382 git rev-list --no-walk --objects --stdin
383 cat "$bl"
384 >"$listok"
385 } | {
386 sh -c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh "$popid" "$pocmd" "$@" || {
387 rm -f "$popid"
388 die "git pack-objects failed"
390 rm -f "$popid"
391 >"$packok"
393 while read -r newpack; do
394 if [ -n "$packbasearg" ]; then
395 move_aside "$packbasearg"-$newpack.*
396 ln -f "${packbasearg}tmp"-$newpack.pack "$packbasearg"-$newpack.pack
397 ln -f "${packbasearg}tmp"-$newpack.idx "$packbasearg"-$newpack.idx
398 rm -f "${packbasearg}tmp"-$newpack.*
400 [ -z "$names" ] || echo "$newpack"
401 done
402 [ $? -eq 0 -a ! -e "$failed" -a -e "$listok" -a -e "$packok" ] || die "unspecified failure"
403 if [ -n "$zap" ]; then
404 find objects/pack -maxdepth 1 -type f -name "*.$zap" -print |
405 while read -r remove; do
406 rm -f "${remove%.$zap}".*
407 done
409 >"$success"