mirrors: include refs/top-bases/* in clean mirrors
[girocco.git] / taskd / clone.sh
blobd7ad4024c161308cb6d266f2d997e7d3ac0992df
1 #!/bin/sh
3 # Invoked from taskd/taskd.pl
5 . @basedir@/shlib.sh
7 set -e
9 umask 002
10 [ "$cfg_permission_control" != "Hooks" ] || umask 000
11 clean_git_env
13 # darcs fast-export | git fast-import with error handling
14 git_darcs_fetch() (
15 set_utf8_locale
16 _err1=
17 _err2=
18 exec 3>&1
19 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
21 exec 4>&3 3>&1 1>&4 4>&-
23 _e1=0
24 "$cfg_basedir"/bin/darcs-fast-export \
25 --export-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
26 echo $_e1 >&3
27 } | \
29 _e2=0
30 git fast-import \
31 --export-marks="$(pwd)/gfi-marks" \
32 --export-pack-edges="$(pwd)/gfi-packs" \
33 --force 3>&- || _e2=$?
34 echo $_e2 >&3
37 EOT
38 exec 3>&-
39 [ "$_err1" = 0 -a "$_err2" = 0 ]
40 return $?
43 # bzr fast-export | git fast-import with error handling
44 git_bzr_fetch() (
45 set_utf8_locale
46 BZR_LOG=/dev/null
47 export BZR_LOG
48 _err1=
49 _err2=
50 exec 3>&1
51 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
53 exec 4>&3 3>&1 1>&4 4>&-
55 _e1=0
56 bzr fast-export --plain \
57 --export-marks="$(pwd)/bfe-marks" "$1" 3>&- || _e1=$?
58 echo $_e1 >&3
59 } | \
61 _e2=0
62 git fast-import \
63 --export-marks="$(pwd)/gfi-marks" \
64 --export-pack-edges="$(pwd)/gfi-packs" \
65 --force 3>&- || _e2=$?
66 echo $_e2 >&3
69 EOT
70 exec 3>&-
71 [ "$_err1" = 0 -a "$_err2" = 0 ]
72 return $?
75 send_clone_failed() {
76 trap "" EXIT
77 # We must now close the .clonelog file that is open on stdout and stderr
78 exec >/dev/null 2>&1
79 failaddrs="$(config_get owner || :)"
80 [ -z "$cfg_admincc" -o "$cfg_admincc" = "0" -o -z "$cfg_admin" ] || \
81 if [ -z "$failaddrs" ]; then failaddrs="$cfg_admin"; else failaddrs="$failaddrs,$cfg_admin"; fi
82 [ -z "$failaddrs" ] || \
84 cat <<EOT
85 Condolences. The clone of project $proj just failed.
87 * Source URL: $url
88 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
90 The project settings link may be used to adjust the settings
91 and restart the clone in order to try the clone again.
92 EOT
93 if [ -f .clonelog -a -r .clonelog ]; then
94 echo ""
95 echo "Log follows:"
96 echo ""
97 loglines=$(LC_ALL=C wc -l <.clonelog)
98 if [ $loglines -le 203 ]; then
99 cat .clonelog
100 else
101 head -n 100 .clonelog
102 echo ""
103 echo "[ ... elided $(( $loglines - 200 )) middle lines ... ]"
104 echo ""
105 tail -n 100 .clonelog
108 } | mail -s "[$cfg_name] $proj clone failed" "$failaddrs" || :
111 # removes all leftovers from a previous failed clone attempt
112 cleanup_failed_clone() {
114 # Remove any left-over svn-remote.svn or remote.origin config
115 git config --remove-section svn-remote.svn 2>/dev/null || :
116 git config --remove-section remote.origin 2>/dev/null || :
118 # If there is a remote-template.origin section, pre-seed the
119 # remote.origin section with its contents
120 git config --get-regexp '^remote-template\.origin\..' |
121 while read name value; do
122 if [ -n "$name" -a -n "$value" ]; then
123 git config "remote${name#remote-template}" "$value"
125 done
127 # Any pre-existing FETCH_HEAD from a previous clone failed or not is
128 # now garbage to be removed
129 rm -f FETCH_HEAD
131 # Remove any left-over svn dir from a previous failed attempt
132 rm -rf svn
134 # Remove any left-over .darcs dirs from a previous failed attempt
135 rm -rf *.darcs
137 # Remove any left-over repo.hg dir from a previous failed attempt
138 rm -rf repo.hg
140 # Remove any left-over import/export/temp files from a previous failed attempt
141 rm -f bfe-marks dfe-marks hg2git-heads hg2git-mapping hg2git-marks* hg2git-state \
142 gfi-marks gfi-packs .pkts-temp .refs-temp
144 # We want a gc right after the clone, so re-enable that just in case.
145 # There's a potential race where we could add it and gc.sh could remove
146 # it, but we'll reunset lastgc just before we remove .delaygc at the end.
147 [ -e .delaygc ] || >.delaygc
148 git config --unset gitweb.lastgc 2>/dev/null || :
150 # Remove all pre-existing refs
151 rm -f packed-refs
152 eval "$(git for-each-ref --shell --format='git update-ref -d %(refname) || :')" 2>/dev/null || :
154 # The initial state before a clone starts has HEAD as a symbolic-ref to master
155 git symbolic-ref HEAD refs/heads/master
157 # HEAD is no longer "ok"
158 git config --unset girocco.headok 2>/dev/null || :
160 # We, perhaps, ought to remove any packs/loose objects now, but the next gc
161 # will get rid of any extras. Also, if we're recloning the same thing, any
162 # preexisting packs/loose objects containing what we're recloning will only
163 # speed up the reclone by avoiding some disk writes. So we don't kill them.
166 proj="${1%.git}"
167 cd "$cfg_reporoot/$proj.git"
168 bang_reset
170 ! [ -e .delaygc ] || >.allowgc || :
172 trap "echo '@OVER@'; touch .clone_failed; send_clone_failed" EXIT
173 echo "Project: $proj"
174 echo ""
175 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 1; }
176 url="$(config_get baseurl || :)"
177 case "$url" in *" "*|*" "*|"")
178 echo "Bad mirror URL (\"$url\")"
179 exit 1
180 esac
182 cleanup_failed_clone
184 # Record original mirror type for use by update.sh
185 mirror_type="$(get_url_mirror_type "$url")"
186 git config girocco.mirrortype "$mirror_type"
188 echo "Mirroring from URL \"$url\""
189 echo ""
191 if [ "$cfg_project_owners" = "source" ]; then
192 config_set owner "$(stat -c %U "$url" 2>/dev/null)"
195 mailaddrs="$(config_get owner || :)"
196 [ -z "$cfg_admin" ] || \
197 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
199 # Initial mirror
200 echo "Initiating mirroring..."
201 headref=
202 showheadwarn=
203 warnempty=
204 case "$url" in
205 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
206 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 1; }
207 # We just remove svn+ here, so svn+http://... becomes http://...
208 # We also remove a trailing '/' to match what git-svn will do
209 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
210 svnurl="${svnurl%/}"
211 # Use an 'anonsvn' username as is commonly used for anonymous svn
212 # Use an 'anonsvn' password as is commonly used for anonymous svn
213 GIT_ASKPASS_PASSWORD=anonsvn
214 export GIT_ASKPASS_PASSWORD
215 # We require svn info to succeed on the URL otherwise it's
216 # simply not a valid URL and without using -s on the init it
217 # will not otherwise be tested until the fetch
218 svn --non-interactive --username anonsvn --password anonsvn info "$svnurl" > /dev/null
219 # We initially use -s for the init which will possibly shorten
220 # the URL. However, the shortening can fail if a password is
221 # not required for the longer version but is for the shorter,
222 # so try again without -s if the -s version fails.
223 # We must use GIT_DIR=. here or ever so "helpful" git-svn will
224 # create a .git subdirectory!
225 GIT_DIR=. git svn init --username=anonsvn --prefix "" -s "$svnurl" < /dev/null || \
226 GIT_DIR=. git svn init --username=anonsvn --prefix "" "$svnurl" < /dev/null
227 # We need to remember this url so we can detect changes because
228 # ever so "helpful" git-svn may shorten it!
229 config_set svnurl "$svnurl"
230 # At this point, since we asked for a standard layout (-s) git-svn
231 # may have been "helpful" and adjusted our $svnurl to a prefix and
232 # then glued the removed suffix onto the front of any svn-remote.svn.*
233 # config items. We could avoid this by not using the '-s' option
234 # but then we might not get all the history. If, for example, we
235 # are cloning an http://svn.example.com/repos/public repository that
236 # early in its history moved trunk => public/trunk we would miss that
237 # earlier history without allowing the funky shorten+prefix behavior.
238 # So we read back the svn-remote.svn.fetch configuration and compute
239 # the prefix. This way we are sure to get the correct prefix.
240 gitsvnurl="$(git config --get svn-remote.svn.url || :)"
241 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | tail -1 || :)"
242 gitsvnprefix="${gitsvnfetch%%:*}"
243 gitsvnsuffix="${gitsvnprefix##*/}"
244 gitsvnprefix="${gitsvnprefix%$gitsvnsuffix}"
245 # Ask git-svn to store everything in the normal non-remote
246 # locations being careful to use the correct prefix
247 git config --replace-all svn-remote.svn.fetch "${gitsvnprefix}trunk:refs/heads/master"
248 git config --replace-all svn-remote.svn.branches "${gitsvnprefix}branches/*:refs/heads/*"
249 git config --replace-all svn-remote.svn.tags "${gitsvnprefix}tags/*:refs/tags/*"
250 # look for additional non-standard directories to fetch
251 # check for standard layout at the same time
252 foundstd=
253 foundfile=
254 { svn --non-interactive --username anonsvn --password anonsvn ls "$gitsvnurl/${gitsvnprefix}" 2>/dev/null || :; } | \
255 { while read file; do case $file in
256 # skip the already-handled standard ones and any with a space or tab
257 *' '*|*' '*) :;;
258 trunk/|branches/|tags/) foundstd=1;;
259 # only fetch extra directories from the $svnurl root (not any files)
260 *?/) git config --add svn-remote.svn.fetch \
261 "${gitsvnprefix}${file%/}:refs/heads/${file%/}";;
262 *?) foundfile=1;;
263 esac; done
264 # if files found and no standard directories present use a simpler layout
265 if [ -z "$foundstd" ] && [ -n "$foundfile" ]; then
266 git config --unset svn-remote.svn.branches
267 git config --unset svn-remote.svn.tags
268 git config --replace-all svn-remote.svn.fetch ':refs/heads/master'
269 fi; }
270 # remember the starting time so we can easily combine fetched loose objects
271 # we sleep for 1 second after creating .svnpack to make sure all objects are newer
272 if ! [ -e .svnpack ]; then
273 rm -f .svnpack
274 >.svnpack
275 sleep 1
277 # Again, be careful to use GIT_DIR=. here or else new .git subdirectory!
278 GIT_DIR=. git svn fetch --log-window-size=$var_log_window_size --username=anonsvn --quiet < /dev/null
279 # git svn does not preserve group permissions in the svn subdirectory
280 chmod -R ug+rw,o+r svn
281 # git svn also leaves behind ref turds that end with @nnn
282 # We get rid of them now
283 git for-each-ref --format='%(objectname) %(refname)' | \
284 { while read sha1 ref; do
285 case "$ref" in
286 ?*@[1-9]|?*@[1-9][0-9]|?*@[1-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9]|\
287 ?*@[1-9][0-9][0-9][0-9][0-9]|?*@[1-9][0-9][0-9][0-9][0-9][0-9]|\
288 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9]|\
289 ?*@[1-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9])
290 git update-ref -d "$ref"
291 esac
292 done; }
293 unset GIT_ASKPASS_PASSWORD
295 darcs://*)
296 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 1; }
297 httpurl="http://${url#darcs://}"
298 git_darcs_fetch "$httpurl"
300 bzr://*)
301 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 1; }
302 # we just remove bzr:// here, a typical bzr url is just
303 # "lp:foo"
304 bzrurl="${url#bzr://}"
305 git_bzr_fetch "$bzrurl"
307 hg+http://* | hg+https://* | hg+file://* | hg+ssh://*)
308 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 1; }
309 # We just remove hg+ here, so hg+http://... becomes http://...
310 hgurl="${url#hg+}"
311 # Perform the initial hg clone
312 hg clone -U "$hgurl" "$(pwd)/repo.hg"
313 # Do the fast-export | fast-import
314 git_hg_fetch
317 # We manually add remote.origin.url and remote.origin.fetch
318 # to simulate a `git remote add --mirror=fetch` since that's
319 # not available until Git 1.7.5 and this way we guarantee we
320 # always get exactly the intended configuration and nothing else.
321 git config remote.origin.url "$url"
322 if ! is_gfi_mirror_url "$url" && [ "$(git config --bool girocco.cleanmirror 2>/dev/null || :)" = "true" ]; then
323 git config --replace-all remote.origin.fetch "+refs/heads/*:refs/heads/*"
324 git config --add remote.origin.fetch "+refs/tags/*:refs/tags/*"
325 git config --add remote.origin.fetch "+refs/notes/*:refs/notes/*"
326 git config --add remote.origin.fetch "+refs/top-bases/*:refs/top-bases/*"
327 git config --bool girocco.lastupdateclean true
328 else
329 git config --replace-all remote.origin.fetch "+refs/*:refs/*"
330 git config --bool girocco.lastupdateclean false
332 # Set the correct HEAD symref by using ls-remote first
333 GIT_SSL_NO_VERIFY=1 GIT_TRACE_PACKET=1 git ls-remote origin >.refs-temp 2>.pkts-temp || \
335 # Since everything was redirected, on failure there'd be no output,
336 # so let's make some failure output
337 cat .pkts-temp
338 echo ""
339 echo "git ls-remote \"$url\" failed"
340 exit 1
342 # Compensate for git() {} side effects
343 unset GIT_TRACE_PACKET
344 # If the server is running at least Git 1.8.4.3 then it will send us the actual
345 # symref for HEAD. If we are running at least Git 1.7.5 then we can snarf that
346 # out of the packet trace data.
347 if [ -s .refs-temp ]; then
348 # Nothing to do unless the remote repository has at least 1 ref
349 # See if we got a HEAD ref
350 head="$(LC_ALL=C grep -E "^$octet20[ $tab]+HEAD\$" <.refs-temp | LC_ALL=C awk '{print $1}')"
351 # If the remote has HEAD set to a symbolic ref that does not exist
352 # then we will not receive a HEAD ref in the ls-remote output
353 headref=
354 showheadwarn=
355 symrefcap=
356 if [ -n "$head" ]; then
357 symrefcap="$(LC_ALL=C sed -ne <.pkts-temp \
358 "/packet:.*git<.*[ $tab]symref="'HEAD:refs\/heads\/'"[^ $tab]/\
359 {s/^.*[ $tab]symref="'HEAD:\(refs\/heads\/'"[^ $tab][^ $tab]*"'\).*$/\1/;p;}')"
360 # prefer $symrefcap (refs/heads/master if no $symrefcap) if it
361 # matches HEAD otherwise take the first refs/heads/... match
362 matchcnt=0
363 while read ref; do
364 [ -n "$ref" ] || continue
365 matchcnt=$(( $matchcnt + 1 ))
366 if [ -z "$headref" ] || [ "$ref" = "${symrefcap:-refs/heads/master}" ]; then
367 headref="$ref"
369 [ "$headref" = "${symrefcap:-refs/heads/master}" -a $matchcnt -gt 1 ] && break
370 done <<-EOT
371 $(LC_ALL=C grep -E "^$head[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp | \
372 LC_ALL=C awk '{print $2}')
374 # Warn if there was more than one match and $symrefcap is empty
375 # or $symrefcap is not the same as $headref since our choice might
376 # differ from the source repository's HEAD
377 if [ $matchcnt -ge 1 -a "$symrefcap" != "$headref" ] && \
378 [ -n "$symrefcap" -o $matchcnt -gt 1 ]; then
379 showheadwarn=1
382 if [ -z "$headref" ]; then
383 # If we still don't have a HEAD ref then prefer refs/heads/master
384 # if it exists otherwise take the first refs/heads/...
385 # We do not support having a detached HEAD.
386 # We always warn now because we will be setting HEAD differently
387 # than the source repository had HEAD set
388 showheadwarn=1
389 while read ref; do
390 [ -n "$ref" ] || continue
391 if [ -z "$headref" ] || [ "$ref" = "refs/heads/master" ]; then
392 headref="$ref"
394 [ "$headref" = "refs/heads/master" ] && break
395 done <<-EOT
396 $(LC_ALL=C grep -E "^$octet20[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp | \
397 LC_ALL=C awk '{print $2}')
400 # If we STILL do not have a HEAD ref (perhaps the source repository
401 # contains only tags) then use refs/heads/master. It will be invalid
402 # but is no worse than we used to do by default and we'll warn about
403 # it. We do not support a HEAD symref to anything other than refs/heads/...
404 [ -n "$headref" ] || headref="refs/heads/master"
405 git symbolic-ref HEAD "$headref"
406 pruneopt=--prune
407 [ "$(git config --bool fetch.prune 2>/dev/null || :)" != "false" ] || pruneopt=
408 git_add_config 'fetch.unpackLimit=1'
409 # Note the git config documentation is wrong
410 # transfer.unpackLimit, if set, overrides fetch.unpackLimit
411 git_add_config 'transfer.unpackLimit=1'
412 # remember the starting time so we can easily detect new packs for fast-import mirrors
413 # we sleep for 1 second after creating .gfipack to make sure all packs are newer
414 if is_gfi_mirror_url "$url" && [ ! -e .gfipack ]; then
415 rm -f .gfipack
416 >.gfipack
417 sleep 1
419 GIT_SSL_NO_VERIFY=1 git remote update $pruneopt
420 if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then
421 find objects/pack -type f -newer .gfipack -name "pack-$octet20.pack" -print >>gfi-packs
422 rm -f .gfipack
424 else
425 warnempty=1
426 git symbolic-ref HEAD "refs/heads/master"
428 rm -f .refs-temp .pkts-temp
430 esac
432 # The objects subdirectories permissions must be updated now.
433 # In the case of a dumb http clone, the permissions will not be correct
434 # (missing group write) despite the core.sharedrepository=1 setting!
435 # The objects themselves seem to have the correct permissions.
436 # This problem appears to have been fixed in the most recent git versions.
437 perms=g+w
438 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
439 chmod $perms $(find objects -maxdepth 1 -type d) 2>/dev/null || :
441 # We may have just cloned a lot of refs and they will all be
442 # individual files at this point. Let's pack them now so we
443 # can have better performance right from the start.
444 git pack-refs --all
446 # Initialize gitweb.lastreceive, gitweb.lastchange and info/lastactivity
447 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
448 git config gitweb.lastchange "$(date '+%a, %d %b %Y %T %z')"
449 git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
450 --count=1 refs/heads > info/lastactivity || :
451 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
453 # Don't leave a multi-megabyte useless FETCH_HEAD behind
454 rm -f FETCH_HEAD
456 # Last ditch attempt to get a valid HEAD for a non-git source
457 check_and_set_head || :
459 # The rest
460 echo "Final touches..."
461 git update-server-info
462 trap "" EXIT
464 # run gc now unless the clone is empty
465 if [ -z "$warnempty" ]; then
466 git config --unset gitweb.lastgc 2>/dev/null || :
467 rm -f .delaygc .allowgc
470 emptynote=
471 [ -z "$warnempty" ] ||
472 emptynote="
473 WARNING: You have mirrored an empty repository.
475 headnote=
476 [ -n "$showheadwarn" -a -n "$headref" ] &&
477 headnote="
478 NOTE: HEAD has been set to a symbolic ref to \"$headref\".
479 Use the \"Project settings\" link to choose a different HEAD symref.
481 sizenote=
482 ! is_gfi_mirror ||
483 sizenote="
484 NOTE: Since this is a mirror of a non-Git source, the initial repository
485 size may be somewhat larger than necessary. This will be corrected
486 shortly. If you intend to clone this repository you may want to
487 wait up to 1 hour before doing so in order to receive the more
488 compact final size.
490 [ -z "$mailaddrs" ] ||
491 mail -s "[$cfg_name] $proj clone completed" "$mailaddrs" <<EOT || :
492 Congratulations! The clone of project $proj just completed.
494 * Source URL: $url
495 * GitWeb interface: $cfg_gitweburl/$proj.git
496 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
497 $emptynote$headnote$sizenote
498 Have a lot of fun.
501 echo "Mirroring finished successfuly!"
502 # In case this is a re-mirror, lastgc could have been set already so clear it now
503 git config --unset gitweb.lastgc || :
504 rm .clone_in_progress
505 echo "$sizenote@OVER@"