perform-pre-gc-linking.sh: match overhauled gc fork handling
[girocco.git] / toolbox / perform-pre-gc-linking.sh
blobdc95234d716dd4208e2276821afef947440f1735
1 #!/bin/sh
3 # Perform pre-gc linking of unreachable objects to forks
5 # It may, under unusual circumstances, be desirable to run git gc
6 # manually. However, running git gc on a project that has forks is
7 # dangerous as it can reap objects not in use by the project itself
8 # but which are still in use by one or more forks which do not have
9 # their own copy since they use an alternates file to refer to them.
11 # Note that a .nogc file should really be created during the manual
12 # gc operation!
14 # Running this script on a project BEFORE manually running git gc
15 # on that project prevents this problem from occuring PROVIDED the
16 # "git gc --prune=all" or "git gc --prune=now" or "git repack -a -d -l"
17 # options are NOT used. In other words NEVER prune objects immediately
18 # if the project has ANY forks at all!
20 # During normal gc.sh operations, what this script does is essentially
21 # what happens AFTER "git repack -A -d -l" but BEFORE "git prune". This
22 # script does, however, also touch all the .pack files to make sure nothing
23 # accidentally gets pruned early (just like gc.sh does).
25 # It is enough to run this script before a "git gc" rather than in the
26 # middle (i.e. after "git repack -A -d -l" but before "git prune") although
27 # that could result in forks referring to now-loosened objects in the parent.
28 # This is not a terrible thing and those objects will be packed up and linked
29 # into the forks before any future "git prune" so nothing will be lost, but
30 # the forks will not be quite as efficient as they could be with everything
31 # located in packs. The simple solution is to just run this script again
32 # after running "git gc". In other words do this:
34 # 1. Run this script on the project
35 # 2. Run "git gc" with any options AVOIDING any than cause immediate pruning
36 # 3. Run this script again on the project (optional but desirable)
38 # Note that running this script WILL make all the project's child forks eligible
39 # for gc at their next interval (i.e. they will not skip running gc even if it
40 # ends up not actually doing anything)
42 set -e
44 . @basedir@/shlib.sh
46 umask 002
48 force=
49 if [ "$1" = "--force" ]; then
50 force=1
51 shift
54 proj="${1%.git}"
55 if [ "$#" -ne 1 ] || [ -z "$proj" ]; then
56 echo "I need a project name (e.g. \"$(basename "$0") example\")"
57 exit 1
59 if ! cd "$cfg_reporoot/$proj.git"; then
60 echo "no such directory: $cfg_reporoot/$proj.git"
61 exit 1
63 apid=
64 ahost=
65 { read -r apid ahost ajunk < gc.pid; } >/dev/null 2>&1 || :
66 if [ -n "$apid" ] && [ -n "$ahost" ]; then
67 echo "ERROR: refusing to run, $cfg_reporoot/$proj.git/gc.pid file exists"
68 echo "ERROR: is gc already running on machine '$ahost' pid '$apid'?"
69 exit 1
72 if [ -z "$force" ] && ! [ -e .nogc -o -e .bypass ]; then
73 echo "WARNING: no .nogc or .bypass file found in $cfg_reporoot/$proj.git"
74 echo "WARNING: jobd.pl could run gc.sh while you're fussing with $proj"
75 echo "WARNING: either create one of those files or re-run with --force"
76 echo "WARNING: (e.g. \"$(basename "$0") --force $proj\") to bypass this warning"
77 echo "WARNING: please remember to remove the file after you're done fussing"
78 exit 1
81 # date -R is linux-only, POSIX equivalent is '+%a, %d %b %Y %T %z'
82 datefmt='+%a, %d %b %Y %T %z'
84 # make sure combine-packs uses the correct Git executable
85 run_combine_packs() {
86 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
89 trap 'echo "pre-packing-and-linking failed" >&2; exit 1' EXIT
92 # The following is taken verbatim from gc.sh (with some whitespace adjustment
93 # and comment removal and ">.gc_failed" commented out) and should be kept in
94 # sync with it
97 # This part touches any packs to make sure loosened objects avoid immediate pruning
99 # ---- BEGIN DUPLICATED CODE SECTION ONE ----
101 list_packs --exclude-no-idx objects/pack | xargs touch -c 2>/dev/null || :
102 bmpack="$(list_packs --exclude-no-bitmap --exclude-no-idx --max-matches 1 objects/pack)"
103 [ -n "$bmpack" ] || bmpack="$(list_packs --exclude-no-idx --max-matches 1 --object-limit -1 --include-boundary objects/pack)"
104 if [ -n "$bmpack" ] && [ -f "$bmpack" -a -s "$bmpack" ]; then
105 sleep 1
106 touch -c "$bmpack" 2>/dev/null || :
109 # ---- END DUPLICATED CODE SECTION ONE ----
112 # This part creates a pack of all loose objects and hard-links it into any children
114 # ---- BEGIN DUPLICATED CODE SECTION TWO ----
116 if has_forks "$proj"; then
117 # Pack up all the loose objects and copy (actually hard link) them into all the forks
118 progress "~ [$proj] creating pack of loose objects for forks"
119 lpacks="$(find objects/$octet -maxdepth 1 -type f -name "$octet19" -print 2>/dev/null |
120 LC_ALL=C awk -F / '{print $2 $3}' |
121 run_combine_packs --objects --names $packopts --incremental --all-progress-implied $quiet --non-empty)" || {
122 #>.gc_failed
123 exit 1
125 # We have to update the lastparentgc time in the child forks even if they do not get any
126 # new "loose objects" pack(s) because they need to run gc just in case the parent now has
127 # some objects that used to only be in the child so they can be removed from the child.
128 # For example, a "patch" might be developed first in a fork and then later accepted into
129 # the parent in which case the objects making up the patch in the child fork are now
130 # redundant (since they're now in the parent as well) and need to be removed from the
131 # child fork which can only happen if the child fork runs gc.
132 forkdir="$proj"
133 # It is enough to copy objects just one level down and get_repo_list
134 # takes a regular expression (which is automatically prefixed with '^')
135 # so we can easily match forks exactly one level down from this project
136 get_repo_list "$forkdir/[^/]*:" |
137 while read fork; do
138 # Ignore forks that do not exist or are symbolic links
139 [ ! -L "$cfg_reporoot/$fork.git" -a -d "$cfg_reporoot/$fork.git" ] || \
140 continue
141 # Or do not have a non-zero length alternates file
142 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] || \
143 continue
144 if [ -n "$lpacks" ]; then
145 # Install the "loose objects" pack(s) into the fork
146 [ -d "$cfg_reporoot/$fork.git/objects/pack" ] || (
147 cd "$cfg_reporoot/$fork.git" && \
148 mkdir -p objects/pack
150 for lpack in $lpacks; do
151 ln -f objects/pack/"pack-$lpack.pack" objects/pack/"pack-$lpack.idx" \
152 "$cfg_reporoot/$fork.git/objects/pack/" || :
153 done
154 if ! [ -e "$cfg_reporoot/$fork.git/.needsgc" ]; then
155 # Trigger a mini gc in the fork if it now has too many packs
156 packs="$(list_packs --quiet --count --exclude-no-idx "$cfg_reporoot/$fork.git/objects/pack" || :)"
157 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
158 >"$cfg_reporoot/$fork.git/.needsgc"
161 git --git-dir="$cfg_reporoot/$fork.git" update-server-info
163 # Update the fork's lastparentgc date (must be current, not $gcstart)
164 git --git-dir="$cfg_reporoot/$fork.git" config \
165 gitweb.lastparentgc "$(date "$datefmt")"
166 done
167 if [ -n "$lpacks" ]; then
168 # Remove the "loose objects" pack(s) from the parent
169 for lpack in $lpacks; do
170 rm -f objects/pack/"pack-$lpack.idx" objects/pack/"pack-$lpack.pack"
171 done
175 # ---- END DUPLICATED CODE SECTION TWO ----
178 trap - EXIT
179 echo "loose objects for $proj have now been packed and linked into child forks (if any)"