fetch: genericize git-svn fetch packing mechanism
[girocco/readme.git] / taskd / clone.sh
blob297b304819d677d9046a65adeb28dcd34bc13c97
1 #!/bin/sh
3 # Invoked from taskd/taskd.pl
5 . @basedir@/shlib.sh
7 set -e
9 umask 002
10 [ "$cfg_permission_control" != "Hooks" ] || umask 000
11 clean_git_env
13 # darcs fast-export | git fast-import with error handling
14 git_darcs_fetch() (
15 set_utf8_locale
16 _err1=
17 _err2=
18 exec 3>&1
19 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
21 exec 4>&3 3>&1 1>&4 4>&-
23 _e1=0
24 "$cfg_basedir"/bin/darcs-fast-export \
25 --export-marks="$(pwd)/dfe-marks" "$1" 3>&- || _e1=$?
26 echo $_e1 >&3
27 } |
29 _e2=0
30 git fast-import \
31 --export-marks="$(pwd)/gfi-marks" \
32 --export-pack-edges="$(pwd)/gfi-packs" \
33 --force 3>&- || _e2=$?
34 echo $_e2 >&3
37 EOT
38 exec 3>&-
39 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
40 return $?
43 # bzr fast-export | git fast-import with error handling
44 git_bzr_fetch() (
45 set_utf8_locale
46 BZR_LOG=/dev/null
47 export BZR_LOG
48 _err1=
49 _err2=
50 exec 3>&1
51 { read -r _err1 || :; read -r _err2 || :; } <<-EOT
53 exec 4>&3 3>&1 1>&4 4>&-
55 _e1=0
56 bzr fast-export --plain \
57 --export-marks="$(pwd)/bfe-marks" "$1" 3>&- || _e1=$?
58 echo $_e1 >&3
59 } |
61 _e2=0
62 git fast-import \
63 --export-marks="$(pwd)/gfi-marks" \
64 --export-pack-edges="$(pwd)/gfi-packs" \
65 --force 3>&- || _e2=$?
66 echo $_e2 >&3
69 EOT
70 exec 3>&-
71 [ "$_err1" = 0 ] && [ "$_err2" = 0 ]
72 return $?
75 send_clone_failed() {
76 trap "" EXIT
77 # We must now close the .clonelog file that is open on stdout and stderr
78 exec >/dev/null 2>&1
79 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
80 failaddrs="$(config_get owner)" || :
81 [ -z "$cfg_admincc" ] || [ "$cfg_admincc" = "0" ] || [ -z "$cfg_admin" ] ||
82 if [ -z "$failaddrs" ]; then failaddrs="$cfg_admin"; else failaddrs="$failaddrs,$cfg_admin"; fi
83 [ -z "$failaddrs" ] ||
85 cat <<EOT
86 Condolences. The clone of project $proj just failed.
88 * Source URL: $url
89 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
91 The project settings link may be used to adjust the settings
92 and restart the clone in order to try the clone again.
93 EOT
94 if [ -f .clonelog ] && [ -r .clonelog ]; then
95 echo ""
96 echo "Log follows:"
97 echo ""
98 loglines=$(LC_ALL=C wc -l <.clonelog)
99 if [ $loglines -le 203 ]; then
100 cat .clonelog
101 else
102 head -n 100 .clonelog
103 echo ""
104 echo "[ ... elided $(( $loglines - 200 )) middle lines ... ]"
105 echo ""
106 tail -n 100 .clonelog
109 } | mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone failed" "$failaddrs" || :
112 # removes all leftovers from a previous failed clone attempt
113 cleanup_failed_clone() {
115 # Remove any left-over svn-remote.svn or remote.origin config
116 git config --remove-section svn-remote.svn 2>/dev/null || :
117 git config --remove-section remote.origin 2>/dev/null || :
119 # If there is a remote-template.origin section, pre-seed the
120 # remote.origin section with its contents
121 git config --get-regexp '^remote-template\.origin\..' |
122 while read name value; do
123 if [ -n "$name" ] && [ -n "$value" ]; then
124 git config "remote${name#remote-template}" "$value"
126 done
128 # Any pre-existing FETCH_HEAD from a previous clone failed or not is
129 # now garbage to be removed
130 rm -f FETCH_HEAD
132 # Remove any left-over svn dir from a previous failed attempt
133 rm -rf svn
135 # Remove any left-over .darcs dirs from a previous failed attempt
136 rm -rf *.darcs
138 # Remove any left-over repo.hg dir from a previous failed attempt
139 rm -rf repo.hg
141 # Remove any left-over import/export/temp files from a previous failed attempt
142 rm -f bfe-marks dfe-marks hg2git-heads hg2git-mapping hg2git-marks* hg2git-state \
143 gfi-marks gfi-packs .pkts-temp .refs-temp
145 # We want a gc right after the clone, so re-enable that just in case.
146 # There's a potential race where we could add it and gc.sh could remove
147 # it, but we'll reunset lastgc just before we remove .delaygc at the end.
148 [ -e .delaygc ] || >.delaygc
149 git config --unset gitweb.lastgc 2>/dev/null || :
151 # Remove all pre-existing refs
152 rm -f packed-refs
153 git for-each-ref --format='delete %(refname)' | git_updateref_stdin 2>/dev/null || :
155 # The initial state before a clone starts has HEAD as a symbolic-ref to master
156 git symbolic-ref HEAD refs/heads/master
158 # HEAD is no longer "ok"
159 git config --unset girocco.headok 2>/dev/null || :
161 # We, perhaps, ought to remove any packs/loose objects now, but the next gc
162 # will get rid of any extras. Also, if we're recloning the same thing, any
163 # preexisting packs/loose objects containing what we're recloning will only
164 # speed up the reclone by avoiding some disk writes. So we don't kill them.
166 # It's just remotely possible that a bunch of failures in a row could
167 # create a big mess that just keeps growing and growing...
168 # Trigger a .needsgc if that happens.
169 check_and_set_needsgc
172 proj="${1%.git}"
173 cd "$cfg_reporoot/$proj.git"
174 bang_reset
176 ! [ -e .delaygc ] || >.allowgc || :
178 trap "echo '@OVER@'; touch .clone_failed; send_clone_failed" EXIT
179 echo "Project: $proj"
180 echo " Date: $(TZ=UTC date '+%Y-%m-%d %T UTC')"
181 echo ""
182 [ -n "$cfg_mirror" ] || { echo "Mirroring is disabled" >&2; exit 1; }
183 url="$(config_get baseurl)" || :
184 case "$url" in *" "*|*" "*|"")
185 echo "Bad mirror URL (\"$url\")"
186 exit 1
187 esac
189 cleanup_failed_clone
191 # Record original mirror type for use by update.sh
192 mirror_type="$(get_url_mirror_type "$url")"
193 git config girocco.mirrortype "$mirror_type"
195 echo "Mirroring from URL \"$url\""
196 echo ""
198 if [ "$cfg_project_owners" = "source" ]; then
199 config set owner "$(ls -ldH "${url#file://}" 2>/dev/null | awk '{print $3}')"
202 mailaddrs="$(config_get owner)" || :
203 [ -z "$cfg_admin" ] ||
204 if [ -z "$mailaddrs" ]; then mailaddrs="$cfg_admin"; else mailaddrs="$mailaddrs,$cfg_admin"; fi
206 # Make sure we don't get any unwanted loose objects
207 # Starting with Git v2.10.0 fast-import can generate loose objects unless we
208 # tweak its configuration to prevent that
209 git_add_config 'fetch.unpackLimit=1'
210 # Note the git config documentation is wrong
211 # transfer.unpackLimit, if set, overrides fetch.unpackLimit
212 git_add_config 'transfer.unpackLimit=1'
213 # But not the Git v2.10.0 and later fastimport.unpackLimit which improperly uses <= instead of <
214 git_add_config 'fastimport.unpackLimit=0'
216 # Initial mirror
217 echo "Initiating mirroring..."
218 headref=
219 showheadwarn=
220 warnempty=
222 # remember the starting time so we can easily combine fetched loose objects
223 # we sleep for 1 second after creating .needspack to make sure all objects are newer
224 if ! [ -e .needspack ]; then
225 rm -f .needspack
226 >.needspack
227 sleep 1
230 case "$url" in
231 svn://* | svn+http://* | svn+https://* | svn+file://* | svn+ssh://*)
232 [ -n "$cfg_mirror_svn" ] || { echo "Mirroring svn is disabled" >&2; exit 1; }
233 # We just remove svn+ here, so svn+http://... becomes http://...
234 # We also remove a trailing '/' to match what git-svn will do
235 case "$url" in svn+ssh://*) svnurl="$url";; *) svnurl="${url#svn+}";; esac
236 svnurl="${svnurl%/}"
237 # Use an 'anonsvn' username as is commonly used for anonymous svn
238 # Use an 'anonsvn' password as is commonly used for anonymous svn
239 GIT_ASKPASS_PASSWORD=anonsvn
240 export GIT_ASKPASS_PASSWORD
241 # We require svn info to succeed on the URL otherwise it's
242 # simply not a valid URL and without using -s on the init it
243 # will not otherwise be tested until the fetch
244 svn --non-interactive --username anonsvn --password anonsvn info "$svnurl" >/dev/null
245 # We initially use -s for the init which will possibly shorten
246 # the URL. However, the shortening can fail if a password is
247 # not required for the longer version but is for the shorter,
248 # so try again without -s if the -s version fails.
249 # We must use GIT_DIR=. here or ever so "helpful" git-svn will
250 # create a .git subdirectory!
251 GIT_DIR=. git svn init --username=anonsvn --prefix "" -s "$svnurl" </dev/null ||
252 GIT_DIR=. git svn init --username=anonsvn --prefix "" "$svnurl" </dev/null
253 # We need to remember this url so we can detect changes because
254 # ever so "helpful" git-svn may shorten it!
255 config_set svnurl "$svnurl"
256 # At this point, since we asked for a standard layout (-s) git-svn
257 # may have been "helpful" and adjusted our $svnurl to a prefix and
258 # then glued the removed suffix onto the front of any svn-remote.svn.*
259 # config items. We could avoid this by not using the '-s' option
260 # but then we might not get all the history. If, for example, we
261 # are cloning an http://svn.example.com/repos/public repository that
262 # early in its history moved trunk => public/trunk we would miss that
263 # earlier history without allowing the funky shorten+prefix behavior.
264 # So we read back the svn-remote.svn.fetch configuration and compute
265 # the prefix. This way we are sure to get the correct prefix.
266 gitsvnurl="$(git config --get svn-remote.svn.url)" || :
267 gitsvnfetch="$(git config --get-all svn-remote.svn.fetch | tail -1)" || :
268 gitsvnprefix="${gitsvnfetch%%:*}"
269 gitsvnsuffix="${gitsvnprefix##*/}"
270 gitsvnprefix="${gitsvnprefix%$gitsvnsuffix}"
271 # Ask git-svn to store everything in the normal non-remote
272 # locations being careful to use the correct prefix
273 git config --replace-all svn-remote.svn.fetch "${gitsvnprefix}trunk:refs/heads/master"
274 git config --replace-all svn-remote.svn.branches "${gitsvnprefix}branches/*:refs/heads/*"
275 git config --replace-all svn-remote.svn.tags "${gitsvnprefix}tags/*:refs/tags/*"
276 # look for additional non-standard directories to fetch
277 # check for standard layout at the same time
278 foundstd=
279 foundfile=
280 svn --non-interactive --username anonsvn --password anonsvn ls "$gitsvnurl/${gitsvnprefix}" 2>/dev/null |
281 { while read file; do case $file in
282 # skip the already-handled standard ones and any with a space or tab
283 *' '*|*' '*) :;;
284 trunk/|branches/|tags/) foundstd=1;;
285 # only fetch extra directories from the $svnurl root (not any files)
286 *?/) git config --add svn-remote.svn.fetch \
287 "${gitsvnprefix}${file%/}:refs/heads/${file%/}";;
288 *?) foundfile=1;;
289 esac; done
290 # if files found and no standard directories present use a simpler layout
291 if [ -z "$foundstd" ] && [ -n "$foundfile" ]; then
292 git config --unset svn-remote.svn.branches
293 git config --unset svn-remote.svn.tags
294 git config --replace-all svn-remote.svn.fetch ':refs/heads/master'
295 fi; }
296 test $? -eq 0
297 # Again, be careful to use GIT_DIR=. here or else new .git subdirectory!
298 GIT_DIR=. git svn fetch --log-window-size=$var_log_window_size --username=anonsvn --quiet </dev/null
299 # git svn does not preserve group permissions in the svn subdirectory
300 chmod -R ug+rw,o+r svn
301 # git svn also leaves behind ref turds that end with @nnn
302 # We get rid of them now
303 git for-each-ref --format='%(refname)' |
304 LC_ALL=C sed '/^..*@[1-9][0-9]*$/!d; s/^/delete /' |
305 git_updateref_stdin
306 unset GIT_ASKPASS_PASSWORD
308 darcs://*)
309 [ -n "$cfg_mirror_darcs" ] || { echo "Mirroring darcs is disabled" >&2; exit 1; }
310 httpurl="http://${url#darcs://}"
311 git_darcs_fetch "$httpurl"
313 bzr://*)
314 [ -n "$cfg_mirror_bzr" ] || { echo "Mirroring bzr is disabled" >&2; exit 1; }
315 # we just remove bzr:// here, a typical bzr url is just
316 # "lp:foo"
317 bzrurl="${url#bzr://}"
318 git_bzr_fetch "$bzrurl"
320 hg+http://* | hg+https://* | hg+file://* | hg+ssh://*)
321 [ -n "$cfg_mirror_hg" ] || { echo "Mirroring hg is disabled" >&2; exit 1; }
322 # We just remove hg+ here, so hg+http://... becomes http://...
323 hgurl="${url#hg+}"
324 # Perform the initial hg clone
325 hg clone -U "$hgurl" "$(pwd)/repo.hg"
326 # Do the fast-export | fast-import
327 git_hg_fetch
330 # We manually add remote.origin.url and remote.origin.fetch
331 # to simulate a `git remote add --mirror=fetch` since that's
332 # not available until Git 1.7.5 and this way we guarantee we
333 # always get exactly the intended configuration and nothing else.
334 git config remote.origin.url "$url"
335 if ! is_gfi_mirror_url "$url" && [ "$(git config --bool girocco.cleanmirror 2>/dev/null || :)" = "true" ]; then
336 git config --replace-all remote.origin.fetch "+refs/heads/*:refs/heads/*"
337 git config --add remote.origin.fetch "+refs/tags/*:refs/tags/*"
338 git config --add remote.origin.fetch "+refs/notes/*:refs/notes/*"
339 git config --add remote.origin.fetch "+refs/top-bases/*:refs/top-bases/*"
340 git config --bool girocco.lastupdateclean true
341 else
342 git config --replace-all remote.origin.fetch "+refs/*:refs/*"
343 git config --bool girocco.lastupdateclean false
345 # Set the correct HEAD symref by using ls-remote first
346 GIT_SSL_NO_VERIFY=1 GIT_TRACE_PACKET=1 git ls-remote origin >.refs-temp 2>.pkts-temp ||
348 # Since everything was redirected, on failure there'd be no output,
349 # so let's make some failure output
350 cat .pkts-temp
351 echo ""
352 echo "git ls-remote \"$url\" failed"
353 exit 1
355 # Compensate for git() {} side effects
356 unset GIT_TRACE_PACKET
357 # If the server is running at least Git 1.8.4.3 then it will send us the actual
358 # symref for HEAD. If we are running at least Git 1.7.5 then we can snarf that
359 # out of the packet trace data.
360 if [ -s .refs-temp ]; then
361 # Nothing to do unless the remote repository has at least 1 ref
362 # See if we got a HEAD ref
363 head="$(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+HEAD\$" <.refs-temp | LC_ALL=C awk '{print $1}')"
364 # If the remote has HEAD set to a symbolic ref that does not exist
365 # then we will not receive a HEAD ref in the ls-remote output
366 headref=
367 showheadwarn=
368 symrefcap=
369 if [ -n "$head" ]; then
370 symrefcap="$(LC_ALL=C sed -ne <.pkts-temp \
371 "/packet:.*git<.*[ $tab]symref="'HEAD:refs\/heads\/'"[^ $tab]/\
372 {s/^.*[ $tab]symref="'HEAD:\(refs\/heads\/'"[^ $tab][^ $tab]*"'\).*$/\1/;p;}')"
373 # prefer $symrefcap (refs/heads/master if no $symrefcap) if it
374 # matches HEAD otherwise take the first refs/heads/... match
375 matchcnt=0
376 while read ref; do
377 [ -n "$ref" ] || continue
378 matchcnt=$(( $matchcnt + 1 ))
379 if [ -z "$headref" ] || [ "$ref" = "${symrefcap:-refs/heads/master}" ]; then
380 headref="$ref"
382 if [ "$headref" = "${symrefcap:-refs/heads/master}" ] && [ $matchcnt -gt 1 ]; then
383 break
385 done <<-EOT
386 $(LC_ALL=C grep -E "^$head[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
387 LC_ALL=C awk '{print $2}')
389 # Warn if there was more than one match and $symrefcap is empty
390 # or $symrefcap is not the same as $headref since our choice might
391 # differ from the source repository's HEAD
392 if [ $matchcnt -ge 1 ] && [ "$symrefcap" != "$headref" ] &&
393 { [ -n "$symrefcap" ] || [ $matchcnt -gt 1 ]; }; then
394 showheadwarn=1
397 if [ -z "$headref" ]; then
398 # If we still don't have a HEAD ref then prefer refs/heads/master
399 # if it exists otherwise take the first refs/heads/...
400 # We do not support having a detached HEAD.
401 # We always warn now because we will be setting HEAD differently
402 # than the source repository had HEAD set
403 showheadwarn=1
404 while read ref; do
405 [ -n "$ref" ] || continue
406 if [ -z "$headref" ] || [ "$ref" = "refs/heads/master" ]; then
407 headref="$ref"
409 [ "$headref" != "refs/heads/master" ] || break
410 done <<-EOT
411 $(LC_ALL=C grep -E "^$octet20$hexdig*[ $tab]+refs/heads/[^ $tab]+\$" <.refs-temp |
412 LC_ALL=C awk '{print $2}')
415 # If we STILL do not have a HEAD ref (perhaps the source repository
416 # contains only tags) then use refs/heads/master. It will be invalid
417 # but is no worse than we used to do by default and we'll warn about
418 # it. We do not support a HEAD symref to anything other than refs/heads/...
419 [ -n "$headref" ] || headref="refs/heads/master"
420 git symbolic-ref HEAD "$headref"
421 pruneopt=--prune
422 [ "$(git config --bool fetch.prune 2>/dev/null || :)" != "false" ] || pruneopt=
423 # remember the starting time so we can easily detect new packs for fast-import mirrors
424 # we sleep for 1 second after creating .gfipack to make sure all packs are newer
425 if is_gfi_mirror_url "$url" && ! [ -e .gfipack ]; then
426 rm -f .gfipack
427 >.gfipack
428 sleep 1
430 GIT_SSL_NO_VERIFY=1 git remote update $pruneopt
431 if [ -e .gfipack ] && is_gfi_mirror_url "$url"; then
432 find -L objects/pack -type f -newer .gfipack -name "pack-$octet20*.pack" -print >>gfi-packs
433 rm -f .gfipack
435 else
436 warnempty=1
437 git symbolic-ref HEAD "refs/heads/master"
439 rm -f .refs-temp .pkts-temp
441 esac
443 # The objects subdirectories permissions must be updated now.
444 # In the case of a dumb http clone, the permissions will not be correct
445 # (missing group write) despite the core.sharedrepository=1 setting!
446 # The objects themselves seem to have the correct permissions.
447 # This problem appears to have been fixed in the most recent git versions.
448 perms=g+w
449 [ "$cfg_permission_control" != "Hooks" ] || perms=go+w
450 chmod $perms $(find -L objects -maxdepth 1 -type d) 2>/dev/null || :
452 # We may have just cloned a lot of refs and they will all be
453 # individual files at this point. Let's pack them now so we
454 # can have better performance right from the start.
455 git pack-refs --all
457 # Initialize gitweb.lastreceive, gitweb.lastchange and info/lastactivity
458 git config gitweb.lastreceive "$(date '+%a, %d %b %Y %T %z')"
459 git config gitweb.lastchange "$(date '+%a, %d %b %Y %T %z')"
460 git for-each-ref --sort=-committerdate --format='%(committerdate:iso8601)' \
461 --count=1 refs/heads >info/lastactivity || :
462 ! [ -d htmlcache ] || { >htmlcache/changed; } 2>/dev/null || :
464 # Don't leave a multi-megabyte useless FETCH_HEAD behind
465 rm -f FETCH_HEAD
467 # Last ditch attempt to get a valid HEAD for a non-git source
468 check_and_set_head || :
470 # The rest
471 echo "Final touches..."
472 git update-server-info
473 trap "" EXIT
475 # run gc now unless the clone is empty
476 if [ -z "$warnempty" ]; then
477 git config --unset gitweb.lastgc 2>/dev/null || :
478 rm -f .delaygc .allowgc
481 emptynote=
482 [ -z "$warnempty" ] ||
483 emptynote="
484 WARNING: You have mirrored an empty repository.
486 headnote=
487 [ -z "$showheadwarn" ] || [ -z "$headref" ] ||
488 headnote="
489 NOTE: HEAD has been set to a symbolic ref to \"$headref\".
490 Use the \"Project settings\" link to choose a different HEAD symref.
492 sizenote=
493 ! is_gfi_mirror ||
494 sizenote="
495 NOTE: Since this is a mirror of a non-Git source, the initial repository
496 size may be somewhat larger than necessary. This will be corrected
497 shortly. If you intend to clone this repository you may want to
498 wait up to 1 hour before doing so in order to receive the more
499 compact final size.
501 [ -z "$mailaddrs" ] ||
502 mailref "clone@$cfg_gitweburl/$proj.git" -s "[$cfg_name] $proj clone completed" "$mailaddrs" <<EOT || :
503 Congratulations! The clone of project $proj just completed.
505 * Source URL: $url
506 * GitWeb interface: $cfg_gitweburl/$proj.git
507 * Project settings: $cfg_webadmurl/editproj.cgi?name=$(echo "$proj" | LC_ALL=C sed -e 's/[+]/%2B/g')
508 $emptynote$headnote$sizenote
509 Have a lot of fun.
512 echo "Mirroring finished successfuly!"
513 # In case this is a re-mirror, lastgc could have been set already so clear it now
514 git config --unset gitweb.lastgc || :
515 rm .clone_in_progress
516 echo "$sizenote@OVER@"