combine-packs.sh: utility to combine .pack files
[girocco.git] / jobd / combine-packs.sh
blob5ac381ad19e05b3c3b4961ca1126557825c6adeb
1 #!/bin/sh
3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016 Kyle J. McKay. All rights reserved
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Version 1.1.0
21 USAGE='
22 printf "%s\n" path-to-pack[.idx|.pack] ... |
23 combine-packs [option]... [pack-objects option]... [pack-base-name]
25 NOTE: The following options MUST be given before any pack-objects options:
27 --replace on success, remove the input packs, see note below
29 --names output the 40-char hex sha1 plus "\n" to stdout for each
30 newly created pack(s), if any
32 --ignore-missing silently ignore input pack file names that do not exist
34 --objects input is a list of object hash id values instead of packs
36 If --replace is given, ALL packs to be combined MUST be located in
37 the objects/pack subdirectory of the current git directory AND the output
38 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
40 Note that if --objects is used then --replace and --ignore-missing are invalid
41 and any missing input objects are always silently ignored.
43 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
44 the current git directory (as output by `git rev-parse --git-dir`).
46 If a <pack-name> does not exist and contains no "/" characters then it is
47 retried as objects/pack/<pack-name> instead.
49 Packs to be combined MUST have an associated .idx file.
51 The pack-base-name may be a relative path name and if so, is ALWAYS relative
52 to the current git directory.
54 If not given, then the pack-base-name defaults to objects/pack/pack
55 relative to the current git directory.
57 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
58 the ONLY option that is automatically passed (but remember that --reuse-delta
59 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
61 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
62 --stdout are forbidden. Although --keep-true-parents is allowed it should
63 not have any effect at all. Using --incremental is recommended only for
64 wizards or with --objects as in most other cases it will result in an empty
65 pack being output.
67 WARNING: the move_aside logic currently only works when pack-base-name is
68 completely omitted!
71 set -e
72 cp_pid=$$
74 perlprog='
75 #!/usr/bin/perl
76 #line 77 "combine-packs.sh"
77 use strict;
78 use warnings;
80 sub discard {
81 my $count = shift;
82 my $x = "";
83 while ($count >= 32768) {
84 read(STDIN, $x, 32768);
85 $count -= 32768;
87 read(STDIN, $x, $count) if $count;
90 my @tags = ();
91 binmode STDIN;
92 while (<STDIN>) {
93 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
94 my ($h, $t, $l) = ($1, $2, $3);
95 my $te = 0;
96 my $tn = "";
97 discard(1 + $l), next unless $2 eq "tag";
98 my $count = 0;
99 while (<STDIN>) {
100 $count += length($_);
101 chomp;
102 last if /^$/;
103 $tn = $1 if /^tag ([^ ]+)$/;
104 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
105 last if $tn && $te;
107 discard(1 + $l - $count);
108 push(@tags, [$te, "$h $tn\n"]);
111 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
114 gd="$(git rev-parse --git-dir)"
115 gd="$(cd "$gd" && pwd -P)"
117 # On some broken platforms running xargs without -r and empty input runs the command
118 xargs_r="$(: | command xargs echo -r)"
120 # Some platforms' broken xargs runs the command always at least once even if
121 # there's no input unless given a special option. Automatically supply the
122 # option on those platforms by providing an xargs function.
123 xargs() { command xargs $xargs_r "$@"; }
126 zap=
128 cleanup_on_exit() {
129 [ -z "$td" ] || ! [ -e "$td" ] || rm -rf "$td"
130 [ -z "$gd" -o -z "$zap" ] || find "$gd/objects/pack" -name "*.$zap" -print0 | xargs -0 rm -f
133 trap cleanup_on_exit EXIT
134 trap 'exit 129' HUP
135 trap 'exit 130' INT
136 trap 'exit 131' QUIT
137 trap 'exit 143' TERM
139 die() {
140 echo "combine-packs: fatal: $*" >&2
141 # In case we are in a sub shell force the entire command to exit
142 # The trap on TERM will make sure cleanup still happens in this case
143 kill $cp_pid
144 exit 1
147 octet='[0-9a-f][0-9a-f]'
148 octet4="$octet$octet$octet$octet"
149 octet20="$octet4$octet4$octet4$octet4$octet4"
151 names=
152 ignoremiss=
153 objectlist=
155 while [ $# -ge 1 ]; do case "$1" in
156 --names)
157 names=1
158 shift
160 --replace)
161 zap="zap-$$"
162 shift
164 --ignore-missing)
165 ignoremiss=1
166 shift
168 -h|--help)
169 printf '%s' "${USAGE#?}"
170 exit 0
172 --objects)
173 objectlist=1
174 shift
177 break
179 esac; done
180 [ -z "$ignoremiss$zap" -o -z "$objectlist" ] || die "invalid options"
182 lastarg=
183 lastargopt=
184 packbase=
185 packbasearg=
186 nonopts=0
187 for arg; do
188 lastarg="$arg"
189 lastargopt=1
190 case "$arg" in
191 --replace|--names|--ignore-missing|-h|--help|--objects)
192 die "invalid options"
194 --revs|--unpacked|--all|--reflog|--indexed-objects)
195 die "forbidden pack-objects options"
201 lastargopt=
202 nonopts=$(( $nonopts + 1 ))
203 esac
204 done
205 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] || [ $nonopts -eq 1 -a -n "$lastargopt" ] || \
206 [ $nonopts -eq 1 -a -z "$lastarg" ]; then
207 die "invalid options"
209 if [ $nonopts -eq 1 ]; then
210 packbase="$lastarg"
211 else
212 packbase="$gd/objects/pack/pack"
214 pbd="$(dirname "$packbase")"
215 [ -e "$pbd" -a -d "$pbd" ] || die "no such directory: $packbase"
216 packbase="$(cd "$(dirname "$packbase")" && pwd -P)/$(basename "$packbase")"
217 pbd="$(dirname "$packbase")"
218 [ -e "$pbd" -a -d "$pbd" ] || die "internal failure realpathing: $packbase"
219 case "$packbase" in "$gd"/?*)
220 packbase="${packbase#$gd/}"
221 esac
222 [ $nonopts -eq 1 ] || packbasearg="$packbase"
223 [ -z "$zap" -o -n "$packbasearg" ] || die "--replace does not allow specifying pack-base"
224 if [ -n "$zap" ] && [ "$(dirname "$packbase")" != "objects/pack" ]; then
225 die "--replace and pack base dir not <git-dir>/objects/pack" >&2
228 td="$(mktemp -d "$gd/cmbnpcks-XXXXXX")"
229 tdmin="$(basename "$td")"
230 cm="$tdmin/commits"
231 tg="$tdmin/tags"
232 tr="$tdmin/trees"
233 bl="$tdmin/blobs"
234 trbl="$tdmin/treesblobs"
235 named="$tdmin/named"
237 get_pack_base() {
238 _name="$1"
239 case "$_name" in
240 $octet20)
241 _name="$gd/objects/pack/pack-$_name"
243 *.idx)
244 _name="${_name%.idx}"
246 *.pack)
247 _name="${_name%.pack}"
249 esac
250 if ! [ -e "$_name.idx" -o -e "$_name.pack" ]; then
251 case "$_name" in */*) :;; *)
252 _name="$gd/objects/pack/$_name"
253 esac
255 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
256 [ -z "$ignoremiss" ] || return 0
257 die "no such pack found matching: $1" >&2
259 _name="$(cd "$(dirname "$_name")" && pwd -P)/$(basename "$_name")"
260 if ! [ -f "$_name.idx" -a -s "$_name.idx" -a -f "$_name.pack" -a -s "$_name.pack" ]; then
261 die "internal failure realpathing: $1" >&2
263 case "$(dirname "$_name")" in "$gd"/?*)
264 _name="${_name#$gd/}"
265 esac
266 if [ -n "$zap" ] && [ "$(dirname "$_name")" != "objects/pack" ]; then
267 die "--replace and pack not in <git-dir>/objects/pack: $1" >&2
269 echo "$_name"
270 return 0
273 # add "old" prefix to passed in existing files, but be careful to hard-link
274 # ALL the files to be renamed to the renamed name BEFORE removing anything
275 move_aside() {
276 for _f; do
277 ! [ -f "$_f" ] || \
278 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")"
279 done
280 for _f; do
281 ! [ -f "$_f" ] || rm -f "$_f"
282 done
283 return 0
286 origdir="$PWD"
287 cd "$gd"
288 >"$cm"
289 >"$tr"
290 >"$bl"
291 if [ -n "$objectlist" ]; then
292 git cat-file --batch-check='%(objectname) %(objecttype)'
293 else
294 while IFS=': ' read -r packraw junk; do
295 pack="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack: $packraw")"
296 if [ -n "$pack" ]; then
297 [ -z "$zap" ] || >"$pack.$zap"
298 git show-index <"$pack.idx"
300 done | cut -d ' ' -f 2 |
301 git cat-file --batch-check='%(objectname) %(objecttype)'
302 fi | awk '{
303 if ($2=="tree") print $1
304 else if ($2=="blob") print $1 >"'"$bl"'"
305 else if ($2=="commit") print $1 >"'"$cm"'"
306 else if ($2=="tag") print $1 >"'"$tg"'"
307 }' | LC_ALL=C sort -u >"$tr"
308 cat "$tr" "$bl" | LC_ALL=C sort -u >"$trbl"
309 git rev-list --no-walk --objects --stdin <"$cm" |
310 LC_ALL=C awk '{print NR " " $0}' |
311 LC_ALL=C sort -k2,2 |
312 LC_ALL=C join -t " " -1 2 - "$trbl" >"$named"
313 pocmd='git pack-objects --delta-base-offset "$@"'
314 [ -z "$packbasearg" ] || pocmd="$pocmd \"${packbasearg}tmp\""
316 cat "$cm"
317 ! [ -s "$tg" ] || git cat-file --batch <"$tg" | perl -e "$perlprog"
318 LC_ALL=C sort -k2,2n <"$named" |
319 LC_ALL=C sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
320 LC_ALL=C join -t " " -v 1 "$tr" "$named" |
321 git rev-list --no-walk --objects --stdin
322 cat "$bl"
323 } |
324 { eval "$pocmd" || die "git pack-objects failed"; } |
325 while read -r newpack; do
326 if [ -n "$packbasearg" ]; then
327 move_aside "$packbasearg"-$newpack.*
328 ln -f "${packbasearg}tmp"-$newpack.pack "$packbasearg"-$newpack.pack
329 ln -f "${packbasearg}tmp"-$newpack.idx "$packbasearg"-$newpack.idx
330 rm -f "${packbasearg}tmp"-$newpack.*
332 [ -z "$names" ] || echo "$newpack"
333 done
334 if [ -n "$zap" ]; then
335 find objects/pack -maxdepth 1 -type f -name "*.$zap" -print |
336 while read -r remove; do
337 rm -f "${remove%.$zap}".*
338 done