gc.sh: combine loose git-svn fetch objects into a pack
[girocco.git] / toolbox / perform-pre-gc-linking.sh
blob974b15e46af35a7488a8a540545e8f6cd7de793f
1 #!/bin/sh
3 # Perform pre-gc linking of packs & objects to forks
5 # It may, under unusually circumstances, be desirable to run git gc
6 # manually. However, running git gc ona project that has forks is
7 # dangerous as it can reap objects not in use by the project itself
8 # but are still in use by one or more forks which do not have their
9 # own copy since they use an alternates file to refer to them.
11 # Note that a .nogc file should really be created during the manual
12 # gc operation!
14 # Running this script on a project BEFORE manually running git gc
15 # on that project prevents this problem from occuring.
17 # Note that there is a race condition (both with this script and
18 # with Girocco's gc.sh) between the time the objects are linked
19 # into forks and the time the repack occurs. The following
20 # scenario could cause object loss:
22 # 1) The linking of objects and packs to forks starts
23 # 2) New objects are pushed to the project AND the linking step
24 # has already progressed far enough that it does not see them
25 # and they do not get linked into any forks.
26 # 3) At least one fork gets an updated ref that refers to the
27 # new objects just pushed to the parent project that were
28 # missed by the linking step.
29 # 4) New refs are pushed to the project that cause the objects
30 # pushed in step (2) and referenced in step (3) to no longer
31 # be referenced by the project itself. This step must
32 # complete BEFORE the git repack step runs and reads all
33 # the project's current ref values.
35 # The window of opportunity here for data loss is extraordinarily
36 # tiny, but it is a non-zero window that could potentially be
37 # expanded by highly unusual system performance issues.
39 set -e
41 . @basedir@/shlib.sh
43 umask 002
45 force=
46 if [ "$1" = "--force" ]; then
47 force=1
48 shift
51 proj="${1%.git}"
52 if [ "$#" -ne 1 ] || [ -z "$proj" ]; then
53 echo "I need a project name (e.g. \"$(basename "$0") example\")"
54 exit 1
56 if ! cd "$cfg_reporoot/$proj.git"; then
57 echo "no such directory: $cfg_reporoot/$proj.git"
58 exit 1
60 apid=
61 ahost=
62 { read -r apid ahost ajunk < gc.pid; } >/dev/null 2>&1 || :
63 if [ -n "$apid" ] && [ -n "$ahost" ]; then
64 echo "ERROR: refusing to run, $cfg_reporoot/$proj.git/gc.pid file exists"
65 echo "ERROR: is gc already running on machine '$ahost' pid '$apid'?"
66 exit 1
69 if [ -z "$force" ] && ! [ -e .nogc -o -e .bypass ]; then
70 echo "WARNING: no .nogc or .bypass file found in $cfg_reporoot/$proj.git"
71 echo "WARNING: jobd.pl could run gc.sh while you're fussing with $proj"
72 echo "WARNING: either create one of those files or re-run with --force"
73 echo "WARNING: (e.g. \"$(basename "$0") --force $proj\") to bypass this warning"
74 echo "WARNING: please remember to remove the file after you're done fussing"
75 exit 1
78 # date -R is linux-only, POSIX equivalent is '+%a, %d %b %Y %T %z'
79 datefmt='+%a, %d %b %Y %T %z'
81 # The following is taken verbatim from gc.sh and should be kept in sync with it
83 # --- BEGIN DUPLICATED CODE ----
85 # safe pruning: we put all our objects in all forks, then we can
86 # safely get rid of extra ones; repacks in forks will get rid of
87 # the redundant ones again then; we carefully grab only loose
88 # objects and pack .idx and .pack files
89 forkdir="$proj"
90 if [ -d "../${forkdir##*/}" ]; then
91 # It is enough to copy objects just one level down and get_repo_list
92 # takes a regular expression (which is automatically prefixed with '^')
93 # so we can easily match forks exactly one level down from this project
94 get_repo_list "$forkdir/[^/]*:" |
95 while read fork; do
96 # Ignore forks that do not exist or are symbolic links
97 [ ! -L "$cfg_reporoot/$fork.git" -a -d "$cfg_reporoot/$fork.git" ] || \
98 continue
99 # Or do not have a non-zero length alternates file
100 [ -s "$cfg_reporoot/$fork.git/objects/info/alternates" ] || \
101 continue
102 # Match objects in parent project
103 for d in objects/??; do
104 [ "$d" != "objects/??" ] || continue
105 mkdir -p "$cfg_reporoot/$fork.git/$d"
106 ln -f "$d"/* "$cfg_reporoot/$fork.git/$d" || :
107 done
108 # Match packs in parent project
109 mkdir -p "$cfg_reporoot/$fork.git/objects/pack"
110 if [ "$(echo objects/pack/pack-*.idx)" != \
111 "objects/pack/pack-*.idx" ]; then
112 ln -f objects/pack/pack-*.pack "$cfg_reporoot/$fork.git/objects/pack" || :
113 ln -f objects/pack/pack-*.idx "$cfg_reporoot/$fork.git/objects/pack" || :
115 # Update the fork's lastparentgc date (must be current, not $gcstart)
116 GIT_DIR="$cfg_reporoot/$fork.git" git config \
117 gitweb.lastparentgc "$(date "$datefmt")"
118 done
121 # --- END DUPLICATED CODE ---
123 echo "objects and packs for $proj have now been linked into forks (if any)"