forks: revert to hard-linking objects during gc
[girocco.git] / toolbox / perform-pre-gc-linking.sh
blobaf827bce2eef17662a9d56592910ddd6dcee2e27
1 #!/bin/sh
3 # Perform pre-gc linking of unreachable objects to forks
5 # It may, under unusual circumstances, be desirable to run git gc
6 # manually. However, running git gc on a project that has forks is
7 # dangerous as it can reap objects not in use by the project itself
8 # but which are still in use by one or more forks which do not have
9 # their own copy since they use an alternates file to refer to them.
11 # Note that a .nogc file should really be created during the manual
12 # gc operation!
14 # Running this script on a project BEFORE manually running git gc
15 # on that project prevents this problem from occuring PROVIDED the
16 # "git gc --prune=all" or "git gc --prune=now" or "git repack -a -d -l"
17 # options are NOT used. In other words NEVER prune objects immediately
18 # if the project has ANY forks at all!
20 # During normal gc.sh operations, what this script does is essentially
21 # what happens AFTER "git repack -A -d -l" but BEFORE "git prune". This
22 # script does, however, also touch all the .pack files to make sure nothing
23 # accidentally gets pruned early (just like gc.sh does).
25 # It is enough to run this script before a "git gc" rather than in the
26 # middle (i.e. after "git repack -A -d -l" but before "git prune") although
27 # that could result in forks referring to now-loosened objects in the parent.
28 # This is not a terrible thing and those objects will be propagated into the
29 # forks before any future "git prune" so nothing will be lost.
31 # However, having any child forks depend on loose objects only available via
32 # their alternates as unreachable loose objects in those alternates can easily
33 # be avoided by simply running this script again after running "git gc". In
34 # other words do this:
36 # 1. Run this script on the project
37 # 2. Run "git gc" with any options AVOIDING any than cause immediate pruning
38 # 3. Run this script again on the project (optional but desirable)
40 # Note that running this script WILL make all the project's child forks eligible
41 # for gc at their next interval (i.e. they will not skip running gc even if it
42 # ends up not actually doing anything).
44 # Loose objects are normally just hard-linked into the child forks, but if the
45 # "--single-pack" option is used they will instead be combined into a single
46 # pack and that will be hard-linked into the child forks instead.
48 set -e
50 . @basedir@/shlib.sh
52 umask 002
54 force=
55 singlepack=
56 [ "$1" != "--force" ] || { force=1; shift; }
57 [ "$1" != "--single-pack" ] || { singlepack=1; shift; }
58 [ "$1" != "--force" ] || { force=1; shift; }
60 proj="${1%.git}"
61 if [ "$#" -ne 1 ] || [ -z "$proj" ]; then
62 echo "I need a project name (e.g. \"$(basename "$0") example\")"
63 exit 1
65 if ! cd "$cfg_reporoot/$proj.git"; then
66 echo "no such directory: $cfg_reporoot/$proj.git"
67 exit 1
69 apid=
70 ahost=
71 { read -r apid ahost ajunk < gc.pid; } >/dev/null 2>&1 || :
72 if [ -n "$apid" ] && [ -n "$ahost" ]; then
73 echo "ERROR: refusing to run, $cfg_reporoot/$proj.git/gc.pid file exists"
74 echo "ERROR: is gc already running on machine '$ahost' pid '$apid'?"
75 exit 1
78 if [ -z "$force" ] && ! [ -e .nogc -o -e .bypass ]; then
79 echo "WARNING: no .nogc or .bypass file found in $cfg_reporoot/$proj.git"
80 echo "WARNING: jobd.pl could run gc.sh while you're fussing with $proj"
81 echo "WARNING: either create one of those files or re-run with --force"
82 echo "WARNING: (e.g. \"$(basename "$0") --force ${singlepack:+--single-pack }$proj\") to bypass this warning"
83 echo "WARNING: please remember to remove the file after you're done fussing"
84 exit 1
87 # date -R is linux-only, POSIX equivalent is '+%a, %d %b %Y %T %z'
88 datefmt='+%a, %d %b %Y %T %z'
90 # make sure combine-packs uses the correct Git executable
91 run_combine_packs() {
92 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
95 trap 'echo "pre-packing-and-linking failed" >&2; exit 1' EXIT
98 # The following is taken verbatim from gc.sh (with some whitespace adjustment
99 # and comment removal and ">.gc_failed" commented out) and should be kept in
100 # sync with it
103 # This part touches any packs to make sure loosened objects avoid immediate pruning
105 # ---- BEGIN DUPLICATED CODE SECTION ONE ----
107 list_packs --exclude-no-idx objects/pack | xargs touch -c 2>/dev/null || :
108 bmpack="$(list_packs --exclude-no-bitmap --exclude-no-idx --max-matches 1 objects/pack)"
109 [ -n "$bmpack" ] || bmpack="$(list_packs --exclude-no-idx --max-matches 1 --object-limit -1 --include-boundary objects/pack)"
110 if [ -n "$bmpack" ] && [ -f "$bmpack" -a -s "$bmpack" ]; then
111 sleep 1
112 touch -c "$bmpack" 2>/dev/null || :
115 # ---- END DUPLICATED CODE SECTION ONE ----
118 # This part creates a pack of all loose objects and hard-links it into any children
119 # This logic is no longer used by default but may be selected with the "--pack" option
121 propagate_single_pack() {
122 # ---- BEGIN DUPLICATED CODE SECTION TWO ----
124 if has_forks "$proj"; then
125 # Pack up all the loose objects and copy (actually hard link) them into all the forks
126 progress "~ [$proj] creating pack of loose objects for forks"
127 lpacks="$(find objects/$octet -maxdepth 1 -type f -name "$octet19" -print 2>/dev/null |
128 LC_ALL=C awk -F / '{print $2 $3}' |
129 run_combine_packs --objects --names $packopts --incremental --all-progress-implied $quiet --non-empty)" || {
130 #>.gc_failed
131 exit 1
133 # We have to update the lastparentgc time in the child forks even if they do not get any
134 # new "loose objects" pack(s) because they need to run gc just in case the parent now has
135 # some objects that used to only be in the child so they can be removed from the child.
136 # For example, a "patch" might be developed first in a fork and then later accepted into
137 # the parent in which case the objects making up the patch in the child fork are now
138 # redundant (since they're now in the parent as well) and need to be removed from the
139 # child fork which can only happen if the child fork runs gc.
140 forkdir="$proj"
141 # It is enough to copy objects just one level down and get_repo_list
142 # takes a regular expression (which is automatically prefixed with '^')
143 # so we can easily match forks exactly one level down from this project
144 get_repo_list "$forkdir/[^/]*:" |
145 while read fork; do
146 # Ignore forks that do not exist or are symbolic links
147 [ ! -L "$cfg_reporoot/$fork.git" -a -d "$cfg_reporoot/$fork.git" ] || \
148 continue
149 # Or do not have a non-zero length alternates file
150 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] || \
151 continue
152 if [ -n "$lpacks" ]; then
153 # Install the "loose objects" pack(s) into the fork
154 [ -d "$cfg_reporoot/$fork.git/objects/pack" ] || (
155 cd "$cfg_reporoot/$fork.git" && \
156 mkdir -p objects/pack
158 for lpack in $lpacks; do
159 ln -f objects/pack/"pack-$lpack.pack" objects/pack/"pack-$lpack.idx" \
160 "$cfg_reporoot/$fork.git/objects/pack/" || :
161 done
162 if ! [ -e "$cfg_reporoot/$fork.git/.needsgc" ]; then
163 # Trigger a mini gc in the fork if it now has too many packs
164 packs="$(list_packs --quiet --count --exclude-no-idx "$cfg_reporoot/$fork.git/objects/pack" || :)"
165 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
166 >"$cfg_reporoot/$fork.git/.needsgc"
169 git --git-dir="$cfg_reporoot/$fork.git" update-server-info
171 # Update the fork's lastparentgc date (must be current, not $gcstart)
172 git --git-dir="$cfg_reporoot/$fork.git" config \
173 gitweb.lastparentgc "$(date "$datefmt")"
174 done
175 if [ -n "$lpacks" ]; then
176 # Remove the "loose objects" pack(s) from the parent
177 for lpack in $lpacks; do
178 rm -f objects/pack/"pack-$lpack.idx" objects/pack/"pack-$lpack.pack"
179 done
183 # ---- END DUPLICATED CODE SECTION TWO ----
187 # This part hard-links all loose objects into any children
189 propagate_objects() {
190 # ---- BEGIN DUPLICATED CODE SECTION THREE ----
192 if has_forks "$proj"; then
193 progress "~ [$proj] hard-linking loose objects into immediate child forks"
194 # We have to update the lastparentgc time in the child forks even if they do not get any
195 # new "loose objects" because they need to run gc just in case the parent now has some
196 # objects that used to only be in the child so they can be removed from the child.
197 # For example, a "patch" might be developed first in a fork and then later accepted into
198 # the parent in which case the objects making up the patch in the child fork are now
199 # redundant (since they're now in the parent as well) and need to be removed from the
200 # child fork which can only happen if the child fork runs gc.
201 shbin="${cfg_posix_sh_bin:-/bin/sh}"
202 forkdir="$proj"
203 # It is enough to copy objects just one level down and get_repo_list
204 # takes a regular expression (which is automatically prefixed with '^')
205 # so we can easily match forks exactly one level down from this project
206 get_repo_list "$forkdir/[^/]*:" |
207 while read fork; do
208 # Ignore forks that do not exist or are symbolic links
209 [ ! -L "$cfg_reporoot/$fork.git" -a -d "$cfg_reporoot/$fork.git" ] || \
210 continue
211 # Or do not have a non-zero length alternates file
212 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] || \
213 continue
214 # Match objects in parent project
215 for d in objects/$octet; do
216 [ "$d" != "objects/$octet" ] || continue
217 mkdir -p "$cfg_reporoot/$fork.git/$d"
218 find "$d" -maxdepth 1 -type f -name "$octet19" -print0 |
219 xargs -0 "$shbin" -c 'ln -f "$@" '"'$cfg_reporoot/$fork.git/$d/'" sh || :
220 done
221 # Update the fork's lastparentgc date (must be current, not $gcstart)
222 git --git-dir="$cfg_reporoot/$fork.git" config \
223 gitweb.lastparentgc "$(date "$datefmt")"
224 done
227 # ---- END DUPLICATED CODE SECTION THREE ----
231 if [ -n "$singlepack" ]; then
232 propagate_single_pack
233 else
234 propagate_objects
237 trap - EXIT
238 echo "loose objects for $proj have now been ${singlepack:+packed and }linked into child forks (if any)"