clone.sh: be more robust restarting a failed clone
[girocco.git] / taskd / clone.sh
blob40336071069b5f81da142c82004ab5e9851aa949
1 #!/bin/sh
3 # Invoked from taskd/taskd.pl
5 . @basedir@/shlib.sh
7 set -e
9 umask 002
10 [ "$cfg_permission_control" != "Hooks" ] || umask 000
12 # darcs fast-export | git fast-import with error handling
13 git_darcs_fetch() (
14 set_utf8_locale
15 _err1=
16 _err2=
17 exec 3>&1
18 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
20 exec 4>&3 3>&1 1>&4 4>&-
22 _e1=0
23 "$cfg_basedir"/bin/darcs-fast-export \
24 --export-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
25 echo $_e1 >&3
26 } | \
28 _e2=0
29 git fast-import \
30 --export-marks="$(pwd)/gfi-marks" \
31 --export-pack-edges="$(pwd)/gfi-packs" \
32 --force 3>&- || _e2=$?
33 echo $_e2 >&3
36 EOT
37 exec 3>&-
38 [ "$_err1" = 0 -a "$_err2" = 0 ]
39 return $?
42 # bzr fast-export | git fast-import with error handling
43 git_bzr_fetch() (
44 set_utf8_locale
45 BZR_LOG=/dev/null
46 export BZR_LOG
47 _err1=
48 _err2=
49 exec 3>&1
50 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
52 exec 4>&3 3>&1 1>&4 4>&-
54 _e1=0
55 bzr fast-export --plain \
56 --export-marks="$(pwd)/bfe-marks" "$1" 3>&- || _e1=$?
57 echo $_e1 >&3
58 } | \
60 _e2=0
61 git fast-import \
62 --export-marks="$(pwd)/gfi-marks" \
63 --export-pack-edges="$(pwd)/gfi-packs" \
64 --force 3>&- || _e2=$?
65 echo $_e2 >&3
68 EOT
69 exec 3>&-
70 [ "$_err1" = 0 -a "$_err2" = 0 ]
71 return $?
74 send_clone_failed() {
75 trap "" EXIT
76 # We must now close the .clonelog file that is open on stdout and stderr
77 exec >/dev/null 2>&1
78 failaddrs="$(config_get owner || :)"
79 [ -z "$cfg_admincc" -o "$cfg_admincc" = "0" -o -z "$cfg_admin" ] || \
80 if [ -z "$failaddrs" ]; then failaddrs="$cfg_admin"; else failaddrs="$failaddrs,$cfg_admin"; fi
81 [ -z "$failaddrs" ] || \
83 cat <<EOT
84 Condolences. The clone of project $proj just failed.
86 * Source URL: $url
87 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
89 The project settings link may be used to adjust the settings
90 and restart the clone in order to try the clone again.
91 EOT
92 if [ -f .clonelog -a -r .clonelog ]; then
93 echo ""
94 echo "Log follows:"
95 echo ""
96 loglines=$(LC_ALL=C wc -l <.clonelog)
97 if [ $loglines -le 203 ]; then
98 cat .clonelog
99 else
100 head -n 100 .clonelog
101 echo ""
102 echo "[ ... elided $(( $loglines - 200 )) middle lines ... ]"
103 echo ""
104 tail -n 100 .clonelog
107 } | mail -s "[$cfg_name] $proj clone failed" "$failaddrs" || :
110 # removes all leftovers from a previous failed clone attempt
111 cleanup_failed_clone() {
113 # Remove any left-over svn-remote.svn or remote.origin config
114 git config --remove-section svn-remote.svn 2>/dev/null || :
115 git config --remove-section remote.origin 2>/dev/null || :
117 # If there is a remote-template.origin section, pre-seed the
118 # remote.origin section with its contents
119 git config --get-regexp '^remote-template\.origin\..' |
120 while read name value; do
121 if [ -n "$name" -a -n "$value" ]; then
122 git config "remote${name#remote-template}" "$value"
124 done
126 # Any pre-existing FETCH_HEAD from a previous clone failed or not is
127 # now garbage to be removed
128 rm -f FETCH_HEAD
130 # Remove any left-over svn dir from a previous failed attempt
131 rm -rf svn
133 # Remove any left-over .darcs dirs from a previous failed attempt
134 rm -rf *.darcs
136 # Remove any left-over repo.hg dir from a previous failed attempt
137 rm -rf repo.hg
139 # Remove any left-over import/export/temp files from a previous failed attempt
140 rm -f bfe-marks dfe-marks hg2git-heads hg2git-mapping hg2git-marks* hg2git-state \
141 gfi-marks gfi-packs .pkts-temp .refs-temp
143 # We want a gc right after the clone, so re-enable that just in case.
144 # There's a potential race where we could add it and gc.sh could remove
145 # it, but we'll reunset lastgc just before we remove .delaygc at the end.
146 [ -e .delaygc ] || >.delaygc
147 git config --unset gitweb.lastgc 2>/dev/null || :
149 # Remove all pre-existing refs
150 eval "$(git for-each-ref --shell --format='git update-ref -d %(refname)')"
152 # The initial state before a clone starts has HEAD as a symbolic-ref to master
153 git symbolic-ref HEAD refs/heads/master
155 # We, perhaps, ought to remove any packs/loose objects now, but the next gc
156 # will get rid of any extras. Also, if we're recloning the same thing, any
157 # preexisting packs/loose objects containing what we're recloning will only
158 # speed up the reclone by avoiding some disk writes. So we don't kill them.
161 proj="${1%.git}"
162 cd "$cfg_reporoot/$proj.git"
163 bang_reset
165 ! [ -e .delaygc ] || >.allowgc || :
167 trap "echo '@OVER@'; touch .clone_failed; send_clone_failed" EXIT
168 echo "Project: $proj"
169 echo ""
170 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 1; }
171 url="$(config_get baseurl || :)"
172 case "$url" in *" "*|*" "*|"")
173 echo "Bad mirror URL (\"$url\")"
174 exit 1
175 esac
177 cleanup_failed_clone
179 # Record original mirror type for use by update.sh
180 mirror_type="$(get_url_mirror_type "$url")"
181 git config girocco.mirrortype "$mirror_type"
183 echo "Mirroring from URL \"$url\""
184 echo ""
186 if [ "$cfg_project_owners" = "source" ]; then
187 config_set owner "$(stat -c %U "$url" 2>/dev/null)"
190 mailaddrs="$(config_get owner || :)"
191 [ -z "$cfg_admin" ] || \
192 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
194 # Initial mirror
195 echo "Initiating mirroring..."
196 headref=
197 showheadwarn=
198 warnempty=
199 case "$url" in
200 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
201 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 1; }
202 # We just remove svn+ here, so svn+http://... becomes http://...
203 # We also remove a trailing '/' to match what git-svn will do
204 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
205 svnurl="${svnurl%/}"
206 # Use an 'anonsvn' username as is commonly used for anonymous svn
207 # Use an 'anonsvn' password as is commonly used for anonymous svn
208 GIT_ASKPASS_PASSWORD=anonsvn
209 export GIT_ASKPASS_PASSWORD
210 # We require svn info to succeed on the URL otherwise it's
211 # simply not a valid URL and without using -s on the init it
212 # will not otherwise be tested until the fetch
213 svn --non-interactive --username anonsvn --password anonsvn info "$svnurl" > /dev/null
214 # We initially use -s for the init which will possibly shorten
215 # the URL. However, the shortening can fail if a password is
216 # not required for the longer version but is for the shorter,
217 # so try again without -s if the -s version fails.
218 # We must use GIT_DIR=. here or ever so "helpful" git-svn will
219 # create a .git subdirectory!
220 GIT_DIR=. git svn init --username=anonsvn --prefix "" -s "$svnurl" < /dev/null || \
221 GIT_DIR=. git svn init --username=anonsvn --prefix "" "$svnurl" < /dev/null
222 # We need to remember this url so we can detect changes because
223 # ever so "helpful" git-svn may shorten it!
224 config_set svnurl "$svnurl"
225 # At this point, since we asked for a standard layout (-s) git-svn
226 # may have been "helpful" and adjusted our $svnurl to a prefix and
227 # then glued the removed suffix onto the front of any svn-remote.svn.*
228 # config items. We could avoid this by not using the '-s' option
229 # but then we might not get all the history. If, for example, we
230 # are cloning an http://svn.example.com/repos/public repository that
231 # early in its history moved trunk => public/trunk we would miss that
232 # earlier history without allowing the funky shorten+prefix behavior.
233 # So we read back the svn-remote.svn.fetch configuration and compute
234 # the prefix. This way we are sure to get the correct prefix.
235 gitsvnurl="$(git config --get svn-remote.svn.url || :)"
236 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | tail -1 || :)"
237 gitsvnprefix="${gitsvnfetch%%:*}"
238 gitsvnsuffix="${gitsvnprefix##*/}"
239 gitsvnprefix="${gitsvnprefix%$gitsvnsuffix}"
240 # Ask git-svn to store everything in the normal non-remote
241 # locations being careful to use the correct prefix
242 git config --replace-all svn-remote.svn.fetch "${gitsvnprefix}trunk:refs/heads/master"
243 git config --replace-all svn-remote.svn.branches "${gitsvnprefix}branches/*:refs/heads/*"
244 git config --replace-all svn-remote.svn.tags "${gitsvnprefix}tags/*:refs/tags/*"
245 # look for additional non-standard directories to fetch
246 # check for standard layout at the same time
247 foundstd=
248 foundfile=
249 { svn --non-interactive --username anonsvn --password anonsvn ls "$gitsvnurl/${gitsvnprefix}" 2>/dev/null || :; } | \
250 { while read file; do case $file in
251 # skip the already-handled standard ones and any with a space or tab
252 *' '*|*' '*) :;;
253 trunk/|branches/|tags/) foundstd=1;;
254 # only fetch extra directories from the $svnurl root (not any files)
255 *?/) git config --add svn-remote.svn.fetch \
256 "${gitsvnprefix}${file%/}:refs/heads/${file%/}";;
257 *?) foundfile=1;;
258 esac; done
259 # if files found and no standard directories present use a simpler layout
260 if [ -z "$foundstd" ] && [ -n "$foundfile" ]; then
261 git config --unset svn-remote.svn.branches
262 git config --unset svn-remote.svn.tags
263 git config --replace-all svn-remote.svn.fetch ':refs/heads/master'
264 fi; }
265 # Again, be careful to use GIT_DIR=. here or else new .git subdirectory!
266 GIT_DIR=. git svn fetch --log-window-size=$var_log_window_size --username=anonsvn --quiet < /dev/null
267 # git svn does not preserve group permissions in the svn subdirectory
268 chmod -R ug+rw,o+r svn
269 # git svn also leaves behind ref turds that end with @nnn
270 # We get rid of them now
271 git for-each-ref --format='%(objectname) %(refname)' | \
272 { while read sha1 ref; do
273 case "$ref" in
274 ?*@[1-9]|?*@[1-9][0-9]|?*@[1-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9]|\
275 ?*@[1-9][0-9][0-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9][0-9][0-9]|\
276 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9]|\
277 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9])
278 git update-ref -d "$ref"
279 esac
280 done; }
281 unset GIT_ASKPASS_PASSWORD
283 darcs://*)
284 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 1; }
285 httpurl="http://${url#darcs://}"
286 git_darcs_fetch "$httpurl"
288 bzr://*)
289 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 1; }
290 # we just remove bzr:// here, a typical bzr url is just
291 # "lp:foo"
292 bzrurl="${url#bzr://}"
293 git_bzr_fetch "$bzrurl"
295 hg+http://* | hg+https://*)
296 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 1; }
297 # We just remove hg+ here, so hg+http://... becomes http://...
298 hgurl="${url#hg+}"
299 # Perform the initial hg clone
300 hg clone -U "$hgurl" "$(pwd)/repo.hg"
301 # Do the fast-export | fast-import
302 git_hg_fetch
305 # We manually add remote.origin.url and remote.origin.fetch
306 # to simulate a `git remote add --mirror=fetch` since that's
307 # not available until Git 1.7.5 and this way we guarantee we
308 # always get exactly the intended configuration and nothing else.
309 git config remote.origin.url "$url"
310 git config remote.origin.fetch "+refs/*:refs/*"
311 # Set the correct HEAD symref by using ls-remote first
312 GIT_SSL_NO_VERIFY=1 GIT_TRACE_PACKET=1 git ls-remote origin >.refs-temp 2>.pkts-temp || \
314 # Since everything was redirected, on failure there'd be no output,
315 # so let's make some failure output
316 cat .pkts-temp
317 echo ""
318 echo "git ls-remote \"$url\" failed"
319 exit 1
321 # Compensate for git() {} side effects
322 unset GIT_TRACE_PACKET
323 # If the server is running at least Git 1.8.4.3 then it will send us the actual
324 # symref for HEAD. If we are running at least Git 1.7.5 then we can snarf that
325 # out of the packet trace data.
326 if [ -s .refs-temp ]; then
327 # Nothing to do unless the remote repository has at least 1 ref
328 # See if we got a HEAD ref
329 head="$(LC_ALL=C grep -E "^$octet20[ $tab]+HEAD\$" <.refs-temp | LC_ALL=C awk '{print $1}')"
330 # If the remote has HEAD set to a symbolic ref that does not exist
331 # then we will not receive a HEAD ref in the ls-remote output
332 headref=
333 showheadwarn=
334 symrefcap=
335 if [ -n "$head" ]; then
336 symrefcap="$(LC_ALL=C sed -ne <.pkts-temp \
337 "/packet:.*git<.*[ $tab]symref="'HEAD:refs\/heads\/'"[^ $tab]/\
338 {s/^.*[ $tab]symref="'HEAD:\(refs\/heads\/'"[^ $tab][^ $tab]*"'\).*$/\1/;p;}')"
339 # prefer $symrefcap (refs/heads/master if no $symrefcap) if it
340 # matches HEAD otherwise take the first refs/heads/... match
341 matchcnt=0
342 while read ref; do
343 [ -n "$ref" ] || continue
344 matchcnt=$(( $matchcnt + 1 ))
345 if [ -z "$headref" ] || [ "$ref" = "${symrefcap:-refs/heads/master}" ]; then
346 headref="$ref"
348 [ "$headref" = "${symrefcap:-refs/heads/master}" -a $matchcnt -gt 1 ] && break
349 done <<-EOT
350 $(LC_ALL=C grep -E "^$head[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp | \
351 LC_ALL=C awk '{print $2}')
353 # Warn if there was more than one match and $symrefcap is empty
354 # or $symrefcap is not the same as $headref since our choice might
355 # differ from the source repository's HEAD
356 if [ $matchcnt -ge 1 -a "$symrefcap" != "$headref" ] && \
357 [ -n "$symrefcap" -o $matchcnt -gt 1 ]; then
358 showheadwarn=1
361 if [ -z "$headref" ]; then
362 # If we still don't have a HEAD ref then prefer refs/heads/master
363 # if it exists otherwise take the first refs/heads/...
364 # We do not support having a detached HEAD.
365 # We always warn now because we will be setting HEAD differently
366 # than the source repository had HEAD set
367 showheadwarn=1
368 while read ref; do
369 [ -n "$ref" ] || continue
370 if [ -z "$headref" ] || [ "$ref" = "refs/heads/master" ]; then
371 headref="$ref"
373 [ "$headref" = "refs/heads/master" ] && break
374 done <<-EOT
375 $(LC_ALL=C grep -E "^$octet20[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp | \
376 LC_ALL=C awk '{print $2}')
379 # If we STILL do not have a HEAD ref (perhaps the source repository
380 # contains only tags) then use refs/heads/master. It will be invalid
381 # but is no worse than we used to do by default and we'll warn about
382 # it. We do not support a HEAD symref to anything other than refs/heads/...
383 [ -n "$headref" ] || headref="refs/heads/master"
384 git symbolic-ref HEAD "$headref"
385 GIT_SSL_NO_VERIFY=1 git remote update --prune
386 else
387 warnempty=1
388 git symbolic-ref HEAD "refs/heads/master"
390 rm -f .refs-temp .pkts-temp
392 esac
394 # The objects subdirectories permissions must be updated now.
395 # In the case of a dumb http clone, the permissions will not be correct
396 # (missing group write) despite the core.sharedrepository=1 setting!
397 # The objects themselves seem to have the correct permissions.
398 # This problem appears to have been fixed in the most recent git versions.
399 perms=g+w
400 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
401 chmod $perms $(find objects -maxdepth 1 -type d) 2>/dev/null || :
403 # We may have just cloned a lot of refs and they will all be
404 # individual files at this point. Let's pack them now so we
405 # can have better performance right from the start.
406 git pack-refs --all
408 # Initialize gitweb.lastreceive, gitweb.lastchange and info/lastactivity
409 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
410 git config gitweb.lastchange "$(date '+%a, %d %b %Y %T %z')"
411 git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
412 --count=1 refs/heads > info/lastactivity || :
413 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
415 # Don't leave a multi-megabyte useless FETCH_HEAD behind
416 rm -f FETCH_HEAD
418 # The rest
419 echo "Final touches..."
420 git update-server-info
421 trap "" EXIT
423 # run gc now unless the clone is empty
424 if [ -z "$warnempty" ]; then
425 git config --unset gitweb.lastgc 2>/dev/null || :
426 rm -f .delaygc .allowgc
429 emptynote=
430 [ -z "$warnempty" ] ||
431 emptynote="
432 WARNING: You have mirrored an empty repository.
434 headnote=
435 [ -n "$showheadwarn" -a -n "$headref" ] &&
436 headnote="
437 NOTE: HEAD has been set to a symbolic ref to \"$headref\".
438 Use the \"Project settings\" link to choose a different HEAD symref.
440 sizenote=
441 ! is_gfi_mirror ||
442 sizenote="
443 NOTE: Since this is a mirror of a non-Git source, the initial repository
444 size may be somewhat larger than necessary. This will be corrected
445 shortly. If you intend to clone this repository you may want to
446 wait up to 1 hour before doing so in order to receive the more
447 compact final size.
449 [ -z "$mailaddrs" ] ||
450 mail -s "[$cfg_name] $proj clone completed" "$mailaddrs" <<EOT || :
451 Congratulations! The clone of project $proj just completed.
453 * Source URL: $url
454 * GitWeb interface: $cfg_gitweburl/$proj.git
455 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
456 $emptynote$headnote$sizenote
457 Have a lot of fun.
460 echo "Mirroring finished successfuly!"
461 # In case this is a re-mirror, lastgc could have been set already so clear it now
462 git config --unset gitweb.lastgc || :
463 rm .clone_in_progress
464 echo "$sizenote@OVER@"