bang: throttle retries
[girocco.git] / jobd / update.sh
blobc69fac9e6527935a61a8e2058cf6bc5dd018957b
1 #!/bin/sh
3 . @basedir@/shlib.sh
5 set -e
7 if [ $# -ne 1 ]; then
8 echo "Usage: update.sh projname" >&2
9 exit 1
12 # date -R is linux-only, POSIX equivalent is '+%a, %d %b %Y %T %z'
13 datefmt='+%a, %d %b %Y %T %z'
15 # freshen_loose_objects full-sha ...
16 # if "$n" is a loose object, set its modification time to now
17 # otherwise silently do nothing with no error. To facilitate conversion
18 # of mirror projects to push projects we also add group write permission.
19 freshen_loose_objects() {
20 _list=
21 for _sha; do
22 _fn="${_sha#??}"
23 _shard="${_sha%$_fn}"
24 _list="$_list objects/$_shard/$_fn"
25 done
26 if [ -n "$_list" ]; then
27 chmod ug+w $_list 2>/dev/null || :
28 touch -c $_list 2>/dev/null || :
32 # darcs fast-export | git fast-import with error handling
33 git_darcs_fetch() (
34 set_utf8_locale
35 _err1=
36 _err2=
37 exec 3>&1
38 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
40 exec 4>&3 3>&1 1>&4 4>&-
42 _e1=0
43 "$cfg_basedir"/bin/darcs-fast-export \
44 --export-marks="$(pwd)/dfe-marks" \
45 --import-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
46 echo $_e1 >&3
47 } | \
49 _e2=0
50 git fast-import \
51 --export-marks="$(pwd)/gfi-marks" \
52 --export-pack-edges="$(pwd)/gfi-packs" \
53 --import-marks="$(pwd)/gfi-marks" \
54 --force 3>&- || _e2=$?
55 echo $_e2 >&3
58 EOT
59 exec 3>&-
60 [ "$_err1" = 0 -a "$_err2" = 0 ]
61 return $?
64 # bzr fast-export | git fast-import with error handling
65 git_bzr_fetch() (
66 set_utf8_locale
67 BZR_LOG=/dev/null
68 export BZR_LOG
69 _err1=
70 _err2=
71 exec 3>&1
72 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
74 exec 4>&3 3>&1 1>&4 4>&-
76 _e1=0
77 bzr fast-export --plain \
78 --export-marks="$(pwd)/dfe-marks" \
79 --import-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
80 echo $_e1 >&3
81 } | \
83 _e2=0
84 git fast-import \
85 --export-marks="$(pwd)/gfi-marks" \
86 --export-pack-edges="$(pwd)/gfi-packs" \
87 --import-marks="$(pwd)/gfi-marks" \
88 --force 3>&- || _e2=$?
89 echo $_e2 >&3
92 EOT
93 exec 3>&-
94 [ "$_err1" = 0 -a "$_err2" = 0 ]
95 return $?
98 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 0; }
100 umask 002
101 [ "$cfg_permission_control" != "Hooks" ] || umask 000
103 proj="${1%.git}"
104 cd "$cfg_reporoot/$proj.git"
106 trap 'if [ $? != 0 ]; then echo "update failed dir: $PWD" >&2; fi; rm -f "$bang_log"' EXIT
107 trap 'exit 130' INT
108 trap 'exit 143' TERM
110 if check_interval lastrefresh $cfg_min_mirror_interval; then
111 progress "= [$proj] update skip (last at $(config_get lastrefresh))"
112 exit 0
114 progress "+ [$proj] update (`date`)"
116 # Any pre-existing FETCH_HEAD from a previous fetch failed or not is garbage
117 rm -f FETCH_HEAD
119 # A previous failed update attempt can leave a huge tmp_pack_XXXXXX file behind.
120 # Since no pushes are allowed to mirrors, we know that any such files that exist
121 # at this point in time are garbage and can be safely deleted, we do not even
122 # need to check how old they are. A tmp_idx_XXXXXX file is also created during
123 # the later stages of the fetch process, so we kill any of those as well.
124 find objects/pack -maxdepth 1 -type f -name "tmp_pack_?*" -print0 | xargs -0 rm -f
125 find objects/pack -maxdepth 1 -type f -name "tmp_idx_?*" -print0 | xargs -0 rm -f
127 bang_setup
128 bang_action="update"
129 bang_trap() {
130 if [ -n "$1" ]; then
131 # Throttle retries
132 # Since gitweb shows the .last_refresh date, it's safe to update
133 # gitweb.lastrefresh to throttle the updates w/o corrupting the
134 # last refresh date display on the gitweb summary page
135 # It's therefore important that we do NOT touch .last_refresh here
136 config_set lastrefresh "$(date "$datefmt")"
140 bang echo "Project: $proj"
141 bang echo ""
142 mail="$(config_get owner || :)"
143 url="$(config_get baseurl || :)"
144 case "$url" in *" "*|*" "*|"")
145 bang_eval 'echo "Bad mirror URL (\"$url\")"; ! :'
146 exit 1
147 esac
148 bang echo "Mirroring from URL \"$url\""
149 bang echo ""
150 statusok="$(git config --bool gitweb.statusupdates 2>/dev/null || echo true)"
151 mailaddrs=''
152 [ "$statusok" = "false" -o -z "$mail" ] || mailaddrs="$mail"
153 [ -z "$cfg_admincc" -o "$cfg_admincc" = "0" -o -z "$cfg_admin" ] || \
154 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
156 bang_eval "git for-each-ref --format '%(refname) %(objectname)' >.refs-temp"
157 bang_eval "LC_ALL=C sort -b -k1,1 <.refs-temp >.refs-before"
158 ! [ -e .delaygc ] || >.allowgc || :
159 svnpackcreated=
161 case "$url" in
162 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
163 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 0; }
164 # Use an 'anonsvn' username as is commonly used for anonymous svn
165 # Use an 'anonsvn' password as is commonly used for anonymous svn
166 GIT_ASKPASS_PASSWORD=anonsvn
167 export GIT_ASKPASS_PASSWORD
168 # Update the git svn url to match baseurl but be cognizant of any
169 # needed prefix changes. See the comments in taskd/clone.sh about
170 # why we need to put up with a prefix in the first place.
171 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
172 svnurl="${svnurl%/}"
173 svnurlold="$(config_get svnurl || :)"
174 if [ "$svnurl" != "$svnurlold" ]; then
175 # We better already have an svn-remote.svn.fetch setting
176 bang test -n "$(git config --get-all svn-remote.svn.fetch || :)"
177 # the only way to truly know what the proper prefix is
178 # is to attempt a fresh git-svn init -s on the new url
179 rm -rf svn-new-url || :
180 # We require svn info to succeed on the URL otherwise it's
181 # simply not a valid URL and without using -s on the init it
182 # will not otherwise be tested until the fetch
183 bang eval 'svn --non-interactive --username anonsvn --password anonsvn info "$svnurl" > /dev/null'
184 bang mkdir svn-new-url
185 GIT_DIR=svn-new-url bang git init --bare --quiet
186 # We initially use -s for the init which will possibly shorten
187 # the URL. However, the shortening can fail if a password is
188 # not required for the longer version but is for the shorter,
189 # so try again without -s if the -s version fails.
190 cmdstr='git svn init --username=anonsvn --prefix "" -s "$svnurl" < /dev/null > /dev/null 2>&1 || '
191 cmdstr="$cmdstr"'git svn init --username=anonsvn --prefix "" "$svnurl" < /dev/null > /dev/null 2>&1'
192 GIT_DIR=svn-new-url bang eval "$cmdstr"
193 gitsvnurl="$(GIT_DIR=svn-new-url git config --get svn-remote.svn.url || :)"
194 gitsvnfetch="$(GIT_DIR=svn-new-url git config --get svn-remote.svn.fetch || :)"
195 gitsvnprefixnew="${gitsvnfetch%%:*}"
196 gitsvnsuffixnew="${gitsvnprefixnew##*/}"
197 gitsvnprefixnew="${gitsvnprefixnew%$gitsvnsuffixnew}"
198 rm -rf svn-new-url || :
199 # Using GIT_DIR= with bang leaves it set to svn-new-url, so reset it to .
200 GIT_DIR=.
201 if [ "$gitsvnurl" != "$(git config --get svn-remote.svn.url || :)" ]; then
202 # The url has been changed.
203 # We must update the url and replace the prefix on all config items
204 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | head -1 || :)"
205 gitsvnprefixold="${gitsvnfetch%%:*}"
206 gitsvnsuffixold="${gitsvnprefixold##*/}"
207 gitsvnprefixold="${gitsvnprefixold%$gitsvnsuffixold}"
208 git config --remove-section 'svn-remote.svnnew' 2>/dev/null || :
209 git config 'svn-remote.svnnew.url' "$gitsvnurl"
210 { git config --get-regexp '^svn-remote\.svn\.' || :; } | \
211 { while read sname sval; do
212 case "$sname" in
213 svn-remote.svn.fetch|svn-remote.svn.branches|svn-remote.svn.tags)
214 sname="${sname#svn-remote.svn.}"
215 sval="${sval#$gitsvnprefixold}"
216 bang git config --add "svn-remote.svnnew.$sname" "${gitsvnprefixnew}$sval"
217 esac
218 done; }
219 bang git config -f svn/.metadata svn-remote.svn.reposRoot "$gitsvnurl"
220 bang git config --remove-section svn-remote.svn
221 bang git config --rename-section svn-remote.svnnew svn-remote.svn
223 bang config_set svnurl "$svnurl"
225 # remove any stale *.lock files greater than 1 hour old in case
226 # git-svn was killed on the last update because it took too long
227 find svn -type f -name '*.lock' -mmin +60 -print0 2>/dev/null | xargs -0 rm -f
228 # remember the starting time so we can easily combine fetched loose objects
229 # we sleep for 1 second after creating .svnpack to make sure all objects are newer
230 if ! [ -e .svnpack ]; then
231 svnpackcreated=1
232 rm -f .svnpack
233 >.svnpack
234 sleep 1
236 GIT_DIR=. bang git svn fetch --log-window-size=$var_log_window_size --username=anonsvn --quiet < /dev/null
237 # git svn does not preserve group permissions in the svn subdirectory
238 chmod -R ug+rw,o+r svn
239 # git svn also leaves behind ref turds that end with @nnn
240 # We get rid of them now
241 git for-each-ref --format='%(objectname) %(refname)' | \
242 { while read sha1 ref; do
243 case "$ref" in
244 ?*@[1-9]|?*@[1-9][0-9]|?*@[1-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9]|\
245 ?*@[1-9][0-9][0-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9][0-9][0-9]|\
246 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9]|\
247 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9])
248 git update-ref -d "$ref"
249 esac
250 done; }
251 unset GIT_ASKPASS_PASSWORD
253 darcs://*)
254 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 0; }
255 httpurl="http://${url#darcs://}"
256 # remove any stale lock files greater than 1 hour old in case
257 # darcs_fast_export was killed on the last update because it took too long
258 find *.darcs -maxdepth 2 -type f -name 'lock' -mmin +60 -print0 2>/dev/null | xargs -0 rm -f
259 bang git_darcs_fetch "$httpurl"
261 bzr://*)
262 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 0; }
263 bzrurl="${url#bzr://}"
264 bang git_bzr_fetch "$bzrurl"
266 hg+http://* | hg+https://* | hg+file://* | hg+ssh://*)
267 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 0; }
268 # We just remove hg+ here, so hg+http://... becomes http://...
269 hgurl="${url#hg+}"
270 # Fetch any new updates
271 bang hg -R "$(pwd)/repo.hg" pull
272 # Do the fast-export | fast-import
273 bang git_hg_fetch
276 [ "$url" = "$(git config --get remote.origin.url || :)" ] || bang config_set_raw remote.origin.url "$url"
277 pruneopt=--prune
278 [ "$(git config --bool fetch.prune 2>/dev/null || :)" != "false" ] || pruneopt=
279 git_add_config 'fetch.unpackLimit=1'
280 # Note the git config documentation is wrong
281 # transfer.unpackLimit, if set, overrides fetch.unpackLimit
282 git_add_config 'transfer.unpackLimit=1'
283 if ! is_gfi_mirror_url "$url"; then
284 lastwasclean=
285 [ "$(git config --bool girocco.lastupdateclean 2>/dev/null || :)" != "true" ] || lastwasclean=1
286 nextisclean=
287 [ "$(git config --bool girocco.cleanmirror 2>/dev/null || :)" != "true" ] || nextisclean=1
288 if [ "$nextisclean" != "$lastwasclean" ]; then
289 if [ -n "$nextisclean" ]; then
290 git config --replace-all remote.origin.fetch "+refs/heads/*:refs/heads/*"
291 git config --add remote.origin.fetch "+refs/tags/*:refs/tags/*"
292 git config --add remote.origin.fetch "+refs/notes/*:refs/notes/*"
293 else
294 git config --replace-all remote.origin.fetch "+refs/*:refs/*"
298 # remember the starting time so we can easily detect new packs for fast-import mirrors
299 # we sleep for 1 second after creating .gfipack to make sure all packs are newer
300 if is_gfi_mirror_url "$url" && [ ! -e .gfipack ]; then
301 rm -f .gfipack
302 >.gfipack
303 sleep 1
305 GIT_SSL_NO_VERIFY=1 bang git remote update $pruneopt
306 if ! is_gfi_mirror_url "$url" && [ "$nextisclean" != "$lastwasclean" ]; then
307 if [ -n "$nextisclean" ]; then
308 # We must manually purge the unclean refs now as even prune won't do it
309 git for-each-ref --format='%(refname)' |
310 while read -r aref; do
311 case "$aref" in refs/heads/*|refs/tags/*|refs/notes/*) :;; *)
312 git update-ref -d "$aref"
313 esac
314 done
316 git config --bool girocco.lastupdateclean ${nextisclean:-0}
318 if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then
319 find objects/pack -type f -newer .gfipack -name "pack-$octet20.pack" -print >>gfi-packs
320 rm -f .gfipack
323 esac
325 # The objects subdirectories permissions must be updated now.
326 # In the case of a dumb http clone, the permissions will not be correct
327 # (missing group write) despite the core.sharedrepository=1 setting!
328 # The objects themselves seem to have the correct permissions.
329 # This problem appears to have been fixed in the most recent git versions.
330 perms=g+w
331 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
332 chmod $perms $(find objects -maxdepth 1 -type d) 2>/dev/null || :
334 bang git update-server-info
336 # We maintain the last refresh date in two places deliberately
337 # so that it's available as part of the config data and also
338 # as a standalone file timestamp that can be accessed without git.
339 bang config_set lastrefresh "$(date "$datefmt")"
340 { >.last_refresh; } 2>/dev/null || :
342 # Check to see if any refs changed
343 bang_eval "git for-each-ref --format '%(refname) %(objectname)' >.refs-temp"
344 bang_eval "LC_ALL=C sort -b -k1,1 <.refs-temp >.refs-after"
345 refschanged=
346 cmp -s .refs-before .refs-after || refschanged=1
348 # There's no way to get git svn to fetch packs, so we always need to run
349 # a mini-gc afterwards if svn actually fetched anything, but that's okay
350 # because it will be very quick
351 if [ -z "$refschanged" -a -n "$svnpackcreated" ]; then
352 # we created the .svnpack but didn't actually fetch anything
353 # so remove it to avoid forcing a mini-gc if not necessary
354 rm -f .svnpack
356 if [ -e .svnpack ] && ! [ -e .needsgc ]; then
357 >.needsgc
360 # Force a mini-gc if $Girocco::Config::delay_gfi_redelta is false and there's
361 # at least one gfi pack present now
362 if [ -z "$cfg_delay_gfi_redelta" ] && ! [ -e .needsgc ] && \
363 [ -f gfi-packs -a -s gfi-packs ] && is_gfi_mirror_url "$url"; then
364 >.needsgc
367 # Activate a mini-gc if there are at least 20 packs present now
368 if ! [ -e .needsgc ]; then
369 packs=
370 { packs="$(list_packs --quiet --count --exclude-no-idx objects/pack || :)" || :; } 2>/dev/null
371 if [ -n "$packs" ] && [ "$packs" -ge 20 ]; then
372 >.needsgc
376 # Look at which refs changed and trigger ref-change for these
377 sockpath="$cfg_chroot/etc/taskd.socket"
378 if [ -n "$refschanged" ]; then
379 bang config_set lastreceive "$(date '+%a, %d %b %Y %T %z')"
380 # We freshen the mod time to now on any old or new ref that is a loose object
381 # For old refs we do it so we will be able to keep them around for 1 day
382 # For new refs we do it in case we are about to run gc and the new ref
383 # actually points to an oldish loose object that had been unreachable
384 # We probably do not need to do it for new refs as Git tries to do that,
385 # but since we're already doing it for old refs (which Git does not do),
386 # it's almost no extra work for new refs, just in case.
388 echo "ref-changes %$proj% $proj"
389 LC_ALL=C join .refs-before .refs-after |
390 LC_ALL=C sed -e '/^[^ ][^ ]* \([^ ][^ ]*\) \1$/d' |
391 while read ref old new; do
392 freshen_loose_objects "$old" "$new"
393 echo "$old $new $ref"
394 done
395 LC_ALL=C join -v 1 .refs-before .refs-after |
396 while read ref old; do
397 freshen_loose_objects "$old"
398 echo "$old 0000000000000000000000000000000000000000 $ref"
399 done
400 LC_ALL=C join -v 2 .refs-before .refs-after |
401 while read ref new; do
402 freshen_loose_objects "$new"
403 echo "0000000000000000000000000000000000000000 $new $ref"
404 done
405 git for-each-ref --format='%(objectname) %(objectname) %(refname)' refs/heads
406 } >.refs-temp
407 if [ -S "$sockpath" ]; then
408 nc_openbsd -w 15 -U "$sockpath" <.refs-temp || :
410 bang config_set lastchange "$(date '+%a, %d %b %Y %T %z')"
411 bang_eval "git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
412 --count=1 refs/heads > info/lastactivity"
413 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
414 rm -f .delaygc .allowgc
417 # If the repository does not yet have a valid HEAD symref try to set one
418 # If an empty repository was cloned and then later becomes unempty you just
419 # lose out on the fancy "symref=HEAD:" logic and get this version instead
420 check_and_set_head || :
422 rm -f .refs-before .refs-after .refs-temp FETCH_HEAD
424 if is_banged; then
425 [ -z "$mailaddrs" ] || ! was_banged_message_sent ||
427 echo "$proj update succeeded - failure recovery"
428 echo "this status message may be disabled on the project admin page"
429 } | mail -s "[$cfg_name] $proj update succeeded" "$mailaddrs" || :
430 bang_reset
433 progress "- [$proj] update (`date`)"