scripts: purge use of test '-a' and '-o' ops and clean up
[girocco.git] / toolbox / perform-pre-gc-linking.sh
blob199d7a312da5ecff3355b9450c207780ee7857e6
1 #!/bin/sh
3 # Perform pre-gc linking of unreachable objects to forks
5 # It may, under unusual circumstances, be desirable to run git gc
6 # manually. However, running git gc on a project that has forks is
7 # dangerous as it can reap objects not in use by the project itself
8 # but which are still in use by one or more forks which do not have
9 # their own copy since they use an alternates file to refer to them.
11 # Note that a .nogc file should really be created during the manual
12 # gc operation!
14 # Running this script on a project BEFORE manually running git gc
15 # on that project prevents this problem from occuring PROVIDED the
16 # "git gc --prune=all" or "git gc --prune=now" or "git repack -a -d -l"
17 # options are NOT used. In other words NEVER prune objects immediately
18 # if the project has ANY forks at all!
20 # During normal gc.sh operations, what this script does is essentially
21 # what happens AFTER "git repack -A -d -l" but BEFORE "git prune". This
22 # script does, however, also touch all the .pack files to make sure nothing
23 # accidentally gets pruned early (just like gc.sh does).
25 # It is enough to run this script before a "git gc" rather than in the
26 # middle (i.e. after "git repack -A -d -l" but before "git prune") although
27 # that could result in forks referring to now-loosened objects in the parent.
28 # This is not a terrible thing and those objects will be propagated into the
29 # forks before any future "git prune" so nothing will be lost.
31 # However, having any child forks depend on loose objects only available via
32 # their alternates as unreachable loose objects in those alternates can easily
33 # be avoided by simply running this script again after running "git gc". In
34 # other words do this:
36 # 1. Run this script on the project
37 # 2. Run "git gc" with any options AVOIDING any than cause immediate pruning
38 # 3. Run this script again on the project (optional but desirable)
40 # Note that running this script WILL make all the project's child forks eligible
41 # for gc at their next interval (i.e. they will not skip running gc even if it
42 # ends up not actually doing anything).
44 # Loose objects are normally just hard-linked into the child forks, but if the
45 # "--single-pack" option is used they will instead be combined into a single
46 # pack and that will be hard-linked into the child forks instead.
47 # With the --include-packs option packs will also be hard-linked into the
48 # forks (the old behavior) -- useful before extreme modification of the parent.
50 set -e
52 . @basedir@/shlib.sh
54 umask 002
56 force=
57 singlepack=
58 packstoo=
59 while case "$1" in
60 --help|-h)
61 cat <<EOT; exit 0;;
62 Usage: $(basename "$0") [option ...] <project-name>
63 --force Run even though no .nogc or .bypass file present
64 --single-pack Hard-link a pack of loose objects down to forks
65 --include-packs Hard-link packs down to forks as well as loose objects
66 <project-name> Name of project (e.g. "git" or "git/fork" etc.)
67 The --single-pack and --include-packs options are currently incompatible.
68 EOT
69 --force)
70 force=1;;
71 --single-pack)
72 singlepack=1;;
73 --include-packs)
74 packstoo=1;;
75 --)
76 shift; break;;
77 -?*)
78 echo "Unknown option: $1" >&2; exit 1;;
80 ! :;;
81 esac; do shift; done
83 if [ -n "$singlepack" ] && [ -n "$packstoo" ]; then
84 echo "Currently --include-packs and --single-pack are incompatible."
85 exit 1
88 proj="${1%.git}"
89 if [ "$#" -ne 1 ] || [ -z "$proj" ]; then
90 echo "I need a project name (e.g. \"$(basename "$0") example\")"
91 echo "(See also help -- \"$(basename "$0") --help\")"
92 exit 1
94 if ! cd "$cfg_reporoot/$proj.git"; then
95 echo "no such directory: $cfg_reporoot/$proj.git"
96 exit 1
98 apid=
99 ahost=
100 { read -r apid ahost ajunk <gc.pid; } >/dev/null 2>&1 || :
101 if [ -n "$apid" ] && [ -n "$ahost" ]; then
102 echo "ERROR: refusing to run, $cfg_reporoot/$proj.git/gc.pid file exists"
103 echo "ERROR: is gc already running on machine '$ahost' pid '$apid'?"
104 exit 1
107 if [ -z "$force" ] && ! [ -e .nogc ] && ! [ -e .bypass ]; then
108 echo "WARNING: no .nogc or .bypass file found in $cfg_reporoot/$proj.git"
109 echo "WARNING: jobd.pl could run gc.sh while you're fussing with $proj"
110 echo "WARNING: either create one of those files or re-run with --force"
111 echo "WARNING: (e.g. \"$(basename "$0") --force ${singlepack:+--single-pack }$proj\") to bypass this warning"
112 echo "WARNING: please remember to remove the file after you're done fussing"
113 exit 1
116 # date -R is linux-only, POSIX equivalent is '+%a, %d %b %Y %T %z'
117 datefmt='+%a, %d %b %Y %T %z'
119 # make sure combine-packs uses the correct Git executable
120 run_combine_packs() {
121 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
124 trap 'echo "pre-packing-and-linking failed" >&2; exit 1' EXIT
127 # The following is taken verbatim from gc.sh (with some whitespace adjustment
128 # and comment removal and ">.gc_failed" commented out and .pack linking added)
129 # and should be kept in sync with it
132 # This part touches any packs to make sure loosened objects avoid immediate pruning
134 # ---- BEGIN DUPLICATED CODE SECTION ONE ----
136 list_packs --exclude-no-idx objects/pack | xargs touch -c 2>/dev/null || :
137 bmpack="$(list_packs --exclude-no-bitmap --exclude-no-idx --max-matches 1 objects/pack)"
138 [ -n "$bmpack" ] || bmpack="$(list_packs --exclude-no-idx --max-matches 1 --object-limit -1 --include-boundary objects/pack)"
139 if [ -n "$bmpack" ] && [ -f "$bmpack" ] && [ -s "$bmpack" ]; then
140 sleep 1
141 touch -c "$bmpack" 2>/dev/null || :
144 # ---- END DUPLICATED CODE SECTION ONE ----
147 # This part creates a pack of all loose objects and hard-links it into any children
148 # This logic is no longer used by default but may be selected with the "--pack" option
150 propagate_single_pack() {
151 # ---- BEGIN DUPLICATED CODE SECTION TWO ----
153 if has_forks "$proj"; then
154 # Pack up all the loose objects and copy (actually hard link) them into all the forks
155 progress "~ [$proj] creating pack of loose objects for forks"
156 lpacks="$(find objects/$octet -maxdepth 1 -type f -name "$octet19" -print 2>/dev/null |
157 LC_ALL=C awk -F / '{print $2 $3}' |
158 run_combine_packs --objects --names $packopts --incremental --all-progress-implied $quiet --non-empty)" || {
159 #>.gc_failed
160 exit 1
162 # We have to update the lastparentgc time in the child forks even if they do not get any
163 # new "loose objects" pack(s) because they need to run gc just in case the parent now has
164 # some objects that used to only be in the child so they can be removed from the child.
165 # For example, a "patch" might be developed first in a fork and then later accepted into
166 # the parent in which case the objects making up the patch in the child fork are now
167 # redundant (since they're now in the parent as well) and need to be removed from the
168 # child fork which can only happen if the child fork runs gc.
169 forkdir="$proj"
170 # It is enough to copy objects just one level down and get_repo_list
171 # takes a regular expression (which is automatically prefixed with '^')
172 # so we can easily match forks exactly one level down from this project
173 get_repo_list "$forkdir/[^/]*:" |
174 while read fork; do
175 # Ignore forks that do not exist or are symbolic links
176 ! [ -L "$cfg_reporoot/$fork.git" ] && [ -d "$cfg_reporoot/$fork.git" ] ||
177 continue
178 # Or do not have a non-zero length alternates file
179 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] ||
180 continue
181 if [ -n "$lpacks" ]; then
182 # Install the "loose objects" pack(s) into the fork
183 [ -d "$cfg_reporoot/$fork.git/objects/pack" ] || (
184 cd "$cfg_reporoot/$fork.git" &&
185 mkdir -p objects/pack
187 for lpack in $lpacks; do
188 ln -f objects/pack/"pack-$lpack.pack" objects/pack/"pack-$lpack.idx" \
189 "$cfg_reporoot/$fork.git/objects/pack/" || :
190 done
191 if ! [ -e "$cfg_reporoot/$fork.git/.needsgc" ]; then
192 # Trigger a mini gc in the fork if it now has too many packs
193 packs="$(list_packs --quiet --count --exclude-no-idx "$cfg_reporoot/$fork.git/objects/pack")" || :
194 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
195 >"$cfg_reporoot/$fork.git/.needsgc"
198 git --git-dir="$cfg_reporoot/$fork.git" update-server-info
200 # Update the fork's lastparentgc date (must be current, not $gcstart)
201 git --git-dir="$cfg_reporoot/$fork.git" config \
202 gitweb.lastparentgc "$(date "$datefmt")"
203 done
204 if [ -n "$lpacks" ]; then
205 # Remove the "loose objects" pack(s) from the parent
206 for lpack in $lpacks; do
207 rm -f objects/pack/"pack-$lpack.idx" objects/pack/"pack-$lpack.pack"
208 done
212 # ---- END DUPLICATED CODE SECTION TWO ----
216 # This part hard-links all loose objects into any children
218 propagate_objects() {
219 # ---- BEGIN DUPLICATED CODE SECTION THREE ----
221 if has_forks "$proj"; then
222 progress "~ [$proj] hard-linking loose objects${packstoo:+and packs } into immediate child forks"
223 # We have to update the lastparentgc time in the child forks even if they do not get any
224 # new "loose objects" because they need to run gc just in case the parent now has some
225 # objects that used to only be in the child so they can be removed from the child.
226 # For example, a "patch" might be developed first in a fork and then later accepted into
227 # the parent in which case the objects making up the patch in the child fork are now
228 # redundant (since they're now in the parent as well) and need to be removed from the
229 # child fork which can only happen if the child fork runs gc.
230 forkdir="$proj"
231 # It is enough to copy objects just one level down and get_repo_list
232 # takes a regular expression (which is automatically prefixed with '^')
233 # so we can easily match forks exactly one level down from this project
234 get_repo_list "$forkdir/[^/]*:" |
235 while read fork; do
236 # Ignore forks that do not exist or are symbolic links
237 ! [ -L "$cfg_reporoot/$fork.git" ] && [ -d "$cfg_reporoot/$fork.git" ] ||
238 continue
239 # Or do not have a non-zero length alternates file
240 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] ||
241 continue
242 # Match objects in parent project
243 for d in objects/$octet; do
244 [ "$d" != "objects/$octet" ] || continue
245 mkdir -p "$cfg_reporoot/$fork.git/$d"
246 find "$d" -maxdepth 1 -type f -name "$octet19" -print0 |
247 xargs -0 "$var_sh_bin" -c 'ln -f "$@" '"'$cfg_reporoot/$fork.git/$d/'" sh || :
248 done
249 # Match packs in parent project if --include-packs given
250 if [ -n "$packstoo" ]; then
251 mkdir -p "$cfg_reporoot/$fork.git/objects/pack"
252 list_packs --all --exclude-no-idx objects/pack | LC_All=C sed 'p;s/\.pack$/.idx/' |
253 xargs "$var_sh_bin" -c 'ln -f "$@" '"'$cfg_reporoot/$fork.git/objects/pack/'" sh || :
254 if ! [ -e "$cfg_reporoot/$fork.git/.needsgc" ]; then
255 # Trigger a mini gc in the fork if it now has too many packs
256 packs="$(list_packs --quiet --count --exclude-no-idx "$cfg_reporoot/$fork.git/objects/pack")" || :
257 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
258 >"$cfg_reporoot/$fork.git/.needsgc"
261 git --git-dir="$cfg_reporoot/$fork.git" update-server-info
263 # Update the fork's lastparentgc date (must be current, not $gcstart)
264 git --git-dir="$cfg_reporoot/$fork.git" config \
265 gitweb.lastparentgc "$(date "$datefmt")"
266 done
269 # ---- END DUPLICATED CODE SECTION THREE ----
273 if [ -n "$singlepack" ]; then
274 propagate_single_pack
275 else
276 propagate_objects
279 trap - EXIT
280 echo "loose objects ${packstoo:+and packs }for $proj have now been ${singlepack:+packed and }linked into child forks (if any)"