scripts: eliminate use of find -print0 | xargs -0
[girocco.git] / hooks / pre-receive
blob3ba94c379475d61d55bf1434fc2ccc79d2e9a414
1 #!/bin/sh
3 # Keep track of the last time we modified the object store
5 # Beware, we MAY be running in a chroot!
7 set -e
9 umask 002
11 if [ -x @perlbin@ ]; then
12 # We are NOT inside the chroot
13 basedir=@basedir@
14 list_packs() { command "$basedir/bin/list_packs" "$@"; }
17 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
19 # Read the incoming refs and freshen old loose objects
20 # If we waited until post-receive a gc could have already nuked them
21 # We freshen the new ref in case it's being resurrected to protect it from gc
22 # We probably do not need to do it for new refs as Git tries to do that,
23 # but since we're already doing it for old refs (which Git does not do),
24 # it's almost no extra work for new refs, just in case. We also attempt
25 # to make all packs user and group writable so they can be touched later.
27 # Starting with Git v2.11.0 receive-pack packs/objects end up in a quarantine
28 # object directory that is just discarded immediately if pre-receive declines
29 # to accept the push. This is a good thing. However, it means that the
30 # incoming objects are NOT located in objects/... as GIT_OBJECT_DIRECTORY and
31 # GIT_QUARANTINE_PATH are both set to the quarantine objects directory and the
32 # original objects directory is appended to GIT_ALTERNATE_OBJECT_DIRECTORIES
33 # (but it will just be the absolute path to objects). The simple bottom line
34 # is that we should also try everything in the GIT_QUARANTINE_PATH directory if
35 # it's set.
36 [ -z "$GIT_QUARANTINE_PATH" ] || [ -d "$GIT_QUARANTINE_PATH" ] || unset GIT_QUARANTINE_PATH
38 # We also record changes to a ref log. We do it here rather than in
39 # post-receive so that we can guarantee all potential changes are recorded in
40 # the log before they take place. It's possible that the update hook will
41 # ultimately deny one or more updates but waiting until post-receive could
42 # result in updates being left out of the log.
44 octet='[0-9a-f][0-9a-f]'
45 octet4="$octet$octet$octet$octet"
46 octet20="$octet4$octet4$octet4$octet4$octet4"
47 _make_packs_ugw() {
48 find "$1" -maxdepth 1 -type f ! -perm -ug+w \
49 -name "pack-$octet20.pack" -exec chmod ug+w '{}' + || :
50 } 2>/dev/null
51 _make_packs_ugw objects/pack
52 [ -z "$GIT_QUARANTINE_PATH" ] || _make_packs_ugw "$GIT_QUARANTINE_PATH/pack"
54 # Trigger a mini-gc if there are at least 20 packs present.
55 # Our current pack that contains this push's data will have a .keep while
56 # this hook is running so we do not use --exclude-keep here under the
57 # assumption that by the time a mini-gc starts the .keep will have been
58 # removed. If not, that's okay too, it just means the current pack will
59 # not take part in the mini-gc and will have to wait for the next one.
60 # The mini-gc code contains the logic to sort out small packs vs. non-small
61 # packs and which should be combined in what order so we do not need to
62 # do any more complicated testing here. We do include any "quarantined" packs
63 # in the count so that any needed gc is not delayed.
64 if ! [ -e .needsgc ]; then
65 packs=
66 { packs="$(list_packs --quiet --count --exclude-no-idx objects/pack || :)" || :; } 2>/dev/null
67 [ -n "$packs" ] || packs=0
68 if [ -n "$GIT_QUARANTINE_PATH" ] && [ -d "$GIT_QUARANTINE_PATH/pack" ]; then
69 { packsq="$(list_packs --quiet --count --exclude-no-idx "$GIT_QUARANTINE_PATH/pack" || :)" || :; } 2>/dev/null
70 [ -n "$packsq" ] || packsq=0
71 packs="$(( $packs + $packsq ))"
73 if [ "$packs" -ge 20 ]; then
74 >.needsgc
78 # Make sure we have a reflogs directory and abort the update if we cannot
79 # create one. Normally project creation will make sure this directory exists.
80 [ -d reflogs ] || mkdir -p reflogs >/dev/null 2>&1 || :
81 [ -d reflogs ]
83 # Multiple push operations could be occurring simultaneously so we need to
84 # guarantee they do not step on one another and we do this by generating a
85 # unique log file name. We use a seconds timestamp and the current process
86 # id and we guarantee that this process is kept alive for the entire second
87 # of the timestamp thereby guaranteeing that we are the only possible process
88 # that could use that pid during that particular second (ignoring leap seconds).
89 # To do this we need to sleep until the second turns over, grab the timestamp
90 # and then sleep until the second turns over again. This will introduce a
91 # guaranteed 2 second delay into every push. This should not generally be
92 # noticeable and does provide a limited throttle on excessive push DOS attacks.
93 # We always use UTC for the timestamp so that chroot and non-chroot match up.
94 # Log entries are the lines sent to the pre-receive hook with hhmmss prepended.
95 sleep 1
96 lognamets="$(TZ=UTC date '+%Y%m%d_%H%M%S')"
97 sleep 1
98 loghhmmss="${lognamets##*_}"
100 # We write to a temp ref log and then move it into place so that the reflogs
101 # collector can assume that log files with their final name are immutable
102 logname="reflogs/$lognamets.$$"
103 lognametmp="reflogs/tmp_$lognamets.$$"
105 while read -r old new ref; do
106 echo "$loghhmmss $old $new $ref" >&3
107 args=
108 if [ "$old" != "0000000000000000000000000000000000000000" ]; then
109 # freshen mod time on recently unref'd loose objects
110 fn="${old#??}"
111 shard="${old%$fn}"
112 args="$args 'objects/$shard/$fn'"
113 [ -z "$GIT_QUARANTINE_DIRECTORY" ] || args="$args '$GIT_QUARANTINE_DIRECTORY/$shard/$fn'"
115 if [ "$new" != "0000000000000000000000000000000000000000" ]; then
116 # prevent imminent pruning of a ref being resurrected
117 fn="${new#??}"
118 shard="${new%$fn}"
119 args="$args 'objects/$shard/$fn'"
120 [ -z "$GIT_QUARANTINE_DIRECTORY" ] || args="$args '$GIT_QUARANTINE_DIRECTORY/$shard/$fn'"
122 eval "chmod ug+w $args" 2>/dev/null || :
123 eval "touch -c $args" 2>/dev/null || :
124 done 3>"$lognametmp"
125 mv "$lognametmp" "$logname"
127 # While unlikely, it is conceivable that several ref updates have occurred that
128 # did not actually push any packs. In that case we could build up a large
129 # number of log files so request a mini gc if there are 50 or more of them now.
130 if ! [ -e .needsgc ]; then
131 logfiles=
132 { logfiles="$(($(find reflogs -maxdepth 1 -type f -print | wc -l || :)+0))" || :; } 2>/dev/null
133 if [ -n "$logfiles" ] && [ "$logfiles" -ge 50 ]; then
134 >.needsgc