gc.sh: always generate optimized packs
[girocco.git] / hooks / pre-receive
blob2065c0f18a47153f8c3683a477fca14d30955e52
1 #!/bin/sh
3 # Keep track of the last time we modified the object store
5 # Beware, we MAY be running in a chroot!
7 set -e
9 var_xargs_r=@var_xargs_r@
10 umask 002
12 if [ -x @perlbin@ ]; then
13 # We are NOT inside the chroot
14 basedir=@basedir@
15 list_packs() { command "$basedir/bin/list_packs" "$@"; }
18 # Some platforms' broken xargs runs the command always at least once even if
19 # there's no input unless given a special option. Automatically supply the
20 # option on those platforms by providing an xargs function.
21 xargs() { command xargs $var_xargs_r "$@"; }
23 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
25 # Read the incoming refs and freshen old loose objects
26 # If we waited until post-receive a gc could have already nuked them
27 # We freshen the new ref in case it's being resurrected to protect it from gc
28 # We probably do not need to do it for new refs as Git tries to do that,
29 # but since we're already doing it for old refs (which Git does not do),
30 # it's almost no extra work for new refs, just in case. We also attempt
31 # to make all packs user and group writable so they can be touched later.
33 # We also record changes to a ref log. We do it here rather than in
34 # post-receive so that we can guarantee all potential changes are recorded in
35 # the log before they take place. It's possible that the update hook will
36 # ultimately deny one or more updates but waiting until post-receive could
37 # result in updates being left out of the log.
39 octet='[0-9a-f][0-9a-f]'
40 octet4="$octet$octet$octet$octet"
41 octet20="$octet4$octet4$octet4$octet4$octet4"
43 find objects/pack -maxdepth 1 -type f \! -perm -ug+w \
44 -name "pack-$octet20.pack" -print0 | \
45 xargs -0 chmod ug+w
46 } 2>/dev/null || :
48 # Trigger a mini-gc if there are at least 20 packs present.
49 # Our current pack that contains this push's data will have a .keep while
50 # this hook is running so we do not use --exclude-keep here under the
51 # assumption that by the time a mini-gc starts the .keep will have been
52 # removed. If not, that's okay too, it just means the current pack will
53 # not take part in the mini-gc and will have to wait for the next one.
54 # The mini-gc code contains the logic to sort out small packs vs. non-small
55 # packs and which should be combined in what order so we do not need to
56 # do any more complicated testing here.
57 if ! [ -e .needsgc ]; then
58 packs=
59 { packs="$(list_packs --quiet --count --exclude-no-idx objects/pack || :)" || :; } 2>/dev/null
60 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
61 >.needsgc
65 # Make sure we have a reflogs directory and abort the update if we cannot
66 # create one. Normally project creation will make sure this directory exists.
67 [ -d reflogs ] || mkdir -p reflogs >/dev/null 2>&1 || :
68 [ -d reflogs ]
70 # Multiple push operations could be occurring simultaneously so we need to
71 # guarantee they do not step on one another and we do this by generating a
72 # unique log file name. We use a seconds timestamp and the current process
73 # id and we guarantee that this process is kept alive for the entire second
74 # of the timestamp thereby guaranteeing that we are the only possible process
75 # that could use that pid during that particular second (ignoring leap seconds).
76 # To do this we need to sleep until the second turns over, grab the timestamp
77 # and then sleep until the second turns over again. This will introduce a
78 # guaranteed 2 second delay into every push. This should not generally be
79 # noticeable and does provide a limited throttle on excessive push DOS attacks.
80 # We always use UTC for the timestamp so that chroot and non-chroot match up.
81 # Log entries are the lines sent to the pre-receive hook with hhmmss prepended.
82 sleep 1
83 lognamets="$(TZ=UTC date '+%Y%m%d_%H%M%S')"
84 sleep 1
85 loghhmmss="${lognamets##*_}"
87 # We write to a temp ref log and then move it into place so that the reflogs
88 # collector can assume that log files with their final name are immutable
89 logname="reflogs/$lognamets.$$"
90 lognametmp="reflogs/tmp_$lognamets.$$"
92 while read -r old new ref; do
93 echo "$loghhmmss $old $new $ref" >&3
94 oldp=
95 newp=
96 if [ "$old" != "0000000000000000000000000000000000000000" ]; then
97 # freshen mod time on recently unref'd loose objects
98 fn="${old#??}"
99 shard="${old%$fn}"
100 oldp="objects/$shard/$fn"
102 if [ "$new" != "0000000000000000000000000000000000000000" ]; then
103 # prevent imminent pruning of a ref being resurrected
104 fn="${new#??}"
105 shard="${new%$fn}"
106 newp="objects/$shard/$fn"
108 chmod ug+w $oldp $newp 2>/dev/null || :
109 touch -c $oldp $newp 2>/dev/null || :
110 done 3>"$lognametmp"
111 mv "$lognametmp" "$logname"
113 # While unlikely, it is conceivable that several ref updates have occurred that
114 # did not actually push any packs. In that case we could build up a large
115 # number of log files so request a mini gc if there are 50 or more of them now.
116 if ! [ -e .needsgc ]; then
117 logfiles=
118 { logfiles="$(($(find reflogs -maxdepth 1 -type f -print | wc -l || :)+0))" || :; } 2>/dev/null
119 if [ -n "$logfiles" ] && [ "$logfiles" -ge 50 ]; then
120 >.needsgc