Couple of minor fixes.
[svn-merge2git.git] / svn-merge2git.sh
blob49b0d782c649d115a3cd3403d25fa45ec2c8ac8c
1 #!/bin/sh
2 # Copyright (c) 2008 Benoit Sigoure <tsuna@lrde.epita.fr>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 OPTIONS_SPEC="\
18 svn-merge2git [options] [refspec]
20 a,all Do the work on all local the branches
21 d,debug Turn on debug mode (useful if you're hacking the script)
22 H,no-rewrite Do not rewrite the entire history (leave the grafts)
23 m,merge= Arg of the form: refspec:rev:branch. Marks refspec as a merge
24 n,dry-run Do the entire processing without actually changing anything
25 p,prune Clean up everything after rewriting the history (not undo-able!)
26 P,no-repack Do not repack the resulting repository (implied by -H)
27 r,remote Include remote branches (create a local branch for each of them)
28 v,verbose Be more verbose
29 x,exclude= refspec to exclude from the potential merge commits
31 SUBDIRECTORY_OK=Yes
32 . git-sh-setup
33 cd_to_toplevel
35 : ${TMPDIR=/tmp}
36 export TMPDIR
38 # BRE (Basic RegExp) compatible with `git rev-list --grep' and `sed'. The RE
39 # *must* capture the revision merged in its first group.
40 merge_pattern='[Mm]erge.*[0-9][0-9]*:\([0-9][0-9]*\)'
42 # BRE which is used to exclude commits whose line that matches of
43 # $merge_pattern also match this pattern.
44 exclude_pattern='Finish'
46 # BRE which is used to exclude matches in the commit log of potential merge
47 # commits.
48 log_exclude_pattern='Finish.*merge'
50 # extract_svn_branch_name <string>
51 # --------------------------------
52 # Find the string the name of a SVN branch. Put the result in
53 # $svn_branch_name. Assumes SVN "stdlayout".
54 extract_svn_branch_name()
56 case $1 in #(
57 '')
58 fatal 'extract_svn_branch_name called with empty argument';; #(
59 */branches/*)
60 extract_svn_branch_name_ 'branches' "$1";; #(
61 */tags/*)
62 extract_svn_branch_name_ 'tags' "$1"
63 warn "found a merge from tag '$svn_branch_name'";; #(
64 */trunk*)
65 svn_branch_name='trunk';; #(
66 *'from the branch "'*'"'*)
67 sed_tmp='s/^.*from the branch "//;s/".*$//'
68 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
69 *[Mm]'erge'*[0-9]:[0-9]*' with '*)
70 sed_tmp='s/.*with //;s/\.$//'
71 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
72 *[Mm]'erge'*[0-9]:[0-9]*' from '*)
73 sed_tmp='s/.*from //;s/\.$//'
74 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
76 svn_branch_name=;;
77 esac
80 # extract_svn_branch_name_ <kind> <string>
81 # ----------------------------------------
82 # Helper of extract_svn_branch_name below to factor some code.
83 # <kind> is probably either 'branches' or 'tags' (for SVN "stdlayout").
84 # Put the result in $svn_branch_name.
85 extract_svn_branch_name_()
87 # XXX: Assumes that a branch name does contain a whitespace. Fragile.
88 # Also strip trailing punctuation.
89 sed_tmp="s|.*/\\($1/[^ ]*\\).*|\\1|;s/[^-a-zA-Z0-9_]*\$//"
90 svn_branch_name=`echo "$2" | sed "$sed_tmp"`
93 me=`basename "$0"`
95 # fatal <msg>
96 # -----------
97 # print <msg> on stderr and exit 1
98 fatal()
100 die "$me: error: $*"
103 warnings=0
104 warn_msgs=
105 # warn <msg>
106 # ----------
107 # print <msg> on stderr
108 warn()
110 echo "$me: warning: $*" >&2
111 warnings=$(($warnings + 1))
112 warn_msgs="$warn_msgs
113 (`date`) warning: $*"
116 # verb <msg>
117 # ----------
118 # Print <msg> when verbose mode is enabled.
119 verb()
121 $opt_verbose && echo "$*"
124 # debug <msg>
125 # -----------
126 # Print <msg> when debug mode is enabled.
127 debug()
129 $opt_debug && echo "$*"
132 # find_merge_parent <ref> <merge-line>
133 # ------------------------------------
134 # Return (in $merge_parent) the sha1 of the commit that has been merged in by
135 # <ref>. <merge-line> must be a line extracted from the commit message of
136 # <ref> and will be used to extract the SVN revision merged. For instance, if
137 # <ref> is a SVN merge of merge-line='Merge -r42:51 in branch foo', this
138 # function puts the sha1 of the first commit the revision of which is <= 51
139 # which happens to be in branch foo in $merge_parent.
140 # If the name of the branch being merged couldn't be found, $merge_parent
141 # contains 'unknown'.
142 find_merge_parent()
144 if $user_defined_merge; then
145 sed_tmp="s/^.*@$1:\\([1-9][0-9]*\\):[^@]*@.*\$/\\1/"
146 svn_merge_to=`echo "$opt_merge" | sed "$sed_tmp"`
147 else
148 # Find the first line that matches $merge_pattern, do the substitution and
149 # quit. Ignore all the other lines.
150 sed_tmp="s/.*$merge_pattern.*/\\1/"
151 svn_merge_to=`echo "$2" | sed "$sed_tmp"`
153 case $svn_merge_to in #(
154 '' | 0* | *[^0-9]*)
155 fatal "invalid SVN revision '$svn_merge_to' found in $1";;
156 esac
157 # Now $svn_merge_to is not necessarily a commit that took part of the
158 # merge. For instance, you can merge -r42:51 https://.../branches/foo
159 # even if the last commit in branch foo is at r46. So it's utterly
160 # important that we find the last commit on the branch being merged the
161 # revision of which must be <= $svn_merge_to (which is 51 in this example).
162 if $user_defined_merge; then
163 sed_tmp="s/^.*@$1:[^:@]*:\\([^@]*\\)@.*\$/\\1/"
164 svn_branch_name=`echo "$opt_merge" | sed "$sed_tmp"`
165 test -n "$svn_branch_name" || fatal "internal error in find_merge_parent"
166 else
167 extract_svn_branch_name "$2"
168 if test -z "$svn_branch_name"; then
169 merge_parent='unknown'
170 return 0
173 # Create a range to intelligently limit the match of rev-list. This will
174 # produce a RE that rules out all the impossible revision numbers (that is,
175 # the revisions >TO). e.g:
176 # 7 -> ([0-7])
177 # 42 -> (4[0-2]|[0-3][0-9]|[1-9])
178 # 123 -> (12[0-3]|1[0-1][0-9]|0[0-9][0-9]|[1-9][0-9]{0,1})
179 # 6951 -> (695[0-1]|69[0-4][0-9]|6[0-8][0-9][0-9]|[0-5][0-9][0-9][0-9]|[1-9][0-9]{0,2})
180 perl_tmp='$_ = "'"$svn_merge_to"'";
181 my $l = length($_);
182 my @r;
183 foreach my $i (0 .. $l - 1) {
184 /^(\d*)(\d)(\d{$i})$/;
185 my ($a, $b, $c) = ($1, int($2), $3);
186 if ($i != 0) {
187 # Avoid pitfalls e.g. 10[0-9] or 0[0-9][0-9] for 101
188 next if $b == 0 or ($b == 1 and $a eq "");
189 --$b;
191 $b = "[0-$b]" if $b;
192 $c =~ s/./[0-9]/g;
193 push(@r, "$a$b$c");
195 push(@r, "[1-9]" . ($l - 2 ? "[0-9]{0," . ($l - 2) . "}" : ""))
196 if $l > 1;
197 print "(" . join("|", @r) . ")";'
198 rev_range=`perl -we "$perl_tmp"`
199 sed_tmp='s/^ *git-svn-id: .*@\([0-9]*\) [-0-9a-f]*$/\1/p'
200 svn_merge_parent=`git rev-list --all -1 --header -E \
201 --grep="^ *git-svn-id: .*/$svn_branch_name@$rev_range [-0-9a-f]*\\$" \
202 | sed -n "$sed_tmp"`
203 case $svn_merge_parent in #(
204 '' | 0* | *[^0-9]*) fatal "invalid svn_merge_parent: '$svn_merge_parent'";;
205 esac
206 rv=$?
207 test $rv -eq 0 || fatal "perl returned $rv"
208 if $opt_verbose; then
209 if test "$svn_merge_to" -eq "$svn_merge_parent"; then
210 verb_tmp=
211 else
212 verb_tmp=" (in fact r$svn_merge_parent)"
215 verb " $1 is merging SVN r$svn_merge_to$verb_tmp from SVN $svn_branch_name"
216 # Now find the sha1 of the merge parent.
217 merge_parent=`git rev-list --all \
218 --grep="^ *git-svn-id: .*@$svn_merge_parent [-0-9a-f]*\\$"`
219 rv=$?
220 test $rv -eq 0 || fatal "git rev-list returned $rv"
223 # create_graft <ref> <merge-parent>
224 # ---------------------------------
225 # Add <merge-parent> as 2nd parent of the commit designated by <ref>.
226 create_graft()
228 # --parents will print $1 along with its current parents.
229 grafted_commit=`git rev-list --no-walk --parents "$1"`
230 rv=$?
231 test $rv -eq 0 || fatal "git rev-list returned $rv"
232 graft_merge_parent=$2
234 case $grafted_commit in #(
235 *"$graft_merge_parent"*)
236 debug " not grafting commit $1: $graft_merge_parent is already a parent ($grafted_commit)"
237 return 0;;
238 esac
240 graft="$grafted_commit $graft_merge_parent"
241 existing_graft=`test -f "$graft_file" && grep "^$1" "$graft_file"`
242 if test $? -eq 0; then
243 if test x"$existing_graft" != x"$graft"; then
244 fatal "$1 is already graft ($existing_graft)\
245 and the graft is different than what I was going to graft ($graft)"
247 nalready=$(($nalready + 1))
248 debug " not grafting commit $1: already properly grafted"
249 return 0
251 debug " grafting commit $1: add parent $graft_merge_parent"
252 $opt_dryrun && return 0
253 nconverted=$(($nconverted + 1))
254 echo >>"$graft_file" "$graft" \
255 || fatal "Failed to add a graft in $graft_file"
258 # rm_original_refs
259 # ----------------
260 # Remove all the refs under refs/original.
261 rm_original_refs()
263 if test -f "$GIT_DIR/packed-refs"; then
264 sed -i '/refs\/original\//d' "$GIT_DIR/packed-refs" \
265 || warn "Failed to edit '$GIT_DIR/packed-refs' (sed returned $?)"
267 rm -rf "$GIT_DIR/refs/original"
270 # rewrite_history
271 # ---------------
272 # Make *all* the grafts part of the actual history.
273 rewrite_history()
275 filter_branch='git filter-branch --parent-filter cat -- --all'
276 if test -n "$parent_unknown"; then
277 parent_unknown=`echo "$parent_unknown" | tr ' ' '\\n' | sort -u | xargs`
278 echo "I could not find the merge parent or merged branch in the following:
279 $parent_unknown
280 If you know which revision they are merging from which branch, you can invoke
281 me again with --merge <ref>:<rev>:<branch> and I will do the magic for you."
282 if $opt_rewrite; then
283 echo -n "Do you want to go ahead and rewrite the entire history\
284 anyway? [y/N] "
285 read answer || {
286 warn "failed to read your answer... I'm not rewriting anything."
287 return 1
289 case $answer in #(
290 [yY]*)
291 echo "Alright so I'll rewrite the history now.
292 Bear in mind that all the refs I printed (especially the ones above of which I
293 couldn't find the parents) will be (most likely) changed so you'll have to
294 figure out by yourself what they are to use them --merge"
295 echo -n 'Rewriting history in'
296 for i in 5 4 3 2 1; do
297 echo -n " ... $i"
298 sleep 1
299 done
300 echo ' ... 0'
301 ;; #(
303 echo "OK. Use '$filter_branch' when you're ready or invoke me again."
304 return 0;;
305 esac
309 if $opt_rewrite; then
310 # Refresh all the timestamps. I don't know why, they always change with
311 # me (only the timestamps!) and git filter-branch will complain because
312 # git diff-files will return differences (due to the timestamp change).
313 # FIXME: Investigate why this script seems to touch the entire WC.
314 # The weird thing is that it works even with a read-only WC.
315 git status >/dev/null
316 $opt_dryrun && return 0
317 if test -f "$graft_file"; then
318 $filter_branch || fatal "git filter-branch returned $?"
319 rm "$graft_file" || warn "Failed to rm $graft_file"
320 else
321 warn "No history rewriting necessary"
323 # FIXME: Is it really necessary to repack if we didn't go through the
324 # previous `if'?
325 if $opt_repack; then
326 if $opt_prune; then
327 prune='--prune'
328 rm_original_refs
329 else
330 prune=
332 git gc $prune || warn "git gc $prune returned $?"
334 else
335 echo "$me: use '$filter_branch' to rewrite the entire history"
339 # doit <REF>
340 # ----------
341 # Find all the merge mentioned in the commit messages and make them become
342 # real Git merges.
343 # Commits that are skipped are stored in $skipped.
344 doit()
346 refspec=$1
347 verb " >> Processing merges in the history of $refspec"
349 git rev-list --no-walk "$refspec" >/dev/null \
350 || fatal "'$refspec' does not seem to be a valid refspec"
352 git rev-list --grep="$merge_pattern" "$refspec" >"$tmp_buf"
353 rv=$?
354 test $rv -eq 0 || fatal "git rev-list failed and returned $rv"
355 while read commit; do
356 # Check that we didn't already process this commit.
357 grep -F "$commit" "$tmp_done" >/dev/null && continue
358 echo "$commit" >>"$tmp_done" || fatal "could not write to $tmp_done"
360 case $opt_exclude in #(
361 *" $commit "*)
362 skipped="$skipped $commit"
363 verb " skipping $commit because it's listed in --exclude";;
364 esac
366 merge_log=`git log --no-walk "$commit"`
367 rv=$?
368 test $rv -eq 0 || fatal "git log returned $rv"
369 merge_line=`echo "$merge_log" | sed "/$merge_pattern/!d;//q"`
371 case $opt_merge in #(
372 *"@$commit:"*':'*'@'*)
373 user_defined_merge=:;; #(
375 user_defined_merge=false;;
376 esac
378 # Maybe skip the commit if it matches $exclude_pattern or
379 # $log_exclude_pattern (in which case it's not a merge)
380 if $user_defined_merge; then
381 : # Do not consider this commit for pattern-based exclusion.
382 else
383 if $has_exclude \
384 && echo "$merge_line" | grep -- "$exclude_pattern" >/dev/null; then
385 skipped="$skipped $commit"
386 verb " skipping $commit whose log merge-line is: $merge_line"
387 continue
389 if $has_log_exclude \
390 && echo "$merge_log" | grep -- "$log_exclude_pattern" >/dev/null; then
391 skipped="$skipped $commit"
392 verb " skipping $commit whose log is:"
393 $opt_verbose && echo "$merge_log" | sed 's/^/ | /'
394 continue
398 nmerge=$(($nmerge + 1))
399 verb " $commit is a merge commit, log says:
400 | $merge_line"
402 find_merge_parent "$commit" "$merge_line"
404 case $merge_parent in #(
405 unknown)
406 warn "could not find the merge parent of $commit"
407 parent_unknown="$parent_unknown $commit"
408 continue;; #(
409 '' | *[^0-9a-f]*)
410 fatal "invalid merge_parent: '$merge_parent'";;
411 esac
413 create_graft "$commit" "$merge_parent"
414 test $? -eq 0 || fatal "failed to create a graft for commit $commit"
415 done <"$tmp_buf"
418 # ------------------ #
419 # `main' starts here #
420 # ------------------ #
422 test -d "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not a directory"
423 test -w "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not writable"
424 tmp_buf=`mktemp "$TMPDIR/$me.XXXXXX"`
425 tmp_done=`mktemp "$TMPDIR/$me.done.XXXXXX"`
426 # Clean up temp file upon exit.
427 trap "exit_status=\$?; rm -f '$tmp_buf' '$tmp_done'; exit \$exit_status" 0
429 # Parse the options passed to the script.
430 # Initialize the defaults
431 opt_all=false
432 opt_debug=false
433 opt_dryrun=false
434 opt_exclude=
435 # '@'-separated list of ':'-separated triplets (refspec:rev:branch)
436 opt_merge=
437 opt_prune=false
438 opt_remote=false
439 opt_repack=:
440 opt_rewrite=:
441 opt_verbose=false
443 # -------- #
444 # `getopt' #
445 # -------- #
446 while test $# != 0
448 case $1 in #(
449 -a | --all)
450 opt_all=:;; #(
451 -d | --debug)
452 opt_debug=:;; #(
453 -H | --no-rewrite)
454 opt_rewrite=false;; #(
455 -m | --merge)
456 shift
457 refspec=${1%%:*}
458 test -n `git rev-list --no-walk "$refspec"` \
459 || fatal "invalid refspec '$refspec' in --merge argument"
460 svn_merge_to=${1%:*}
461 svn_merge_to=${svn_merge_to#*:}
462 case $svn_merge_to in #(
463 '' | 0* | *[^0-9]*)
464 fatal "invalid SVN revision '$svn_merge_to' in --merge argument";;
465 esac
466 svn_branch_name=${1#*:}
467 svn_branch_name=${svn_branch_name#*:}
468 case $svn_merge_to in #(
470 fatal "empty SVN branch name in --merge argument";; #(
471 *' '*)
472 fatal "whitespace unsupported in SVN branch name in --merge argument";;
473 *'@'*)
474 fatal "at (@) unsupported in SVN branch name in --merge argument";;
475 esac
476 opt_merge="$opt_merge@$1@"
477 ;; #(
478 -n | --dry-run)
479 opt_dryrun=:;; #(
480 -p | --prune)
481 opt_prune=:;; #(
482 -P | --no-repack)
483 opt_repack=false;; #(
484 -r | --remote)
485 opt_remote=:;; #(
486 -v | --verbose)
487 opt_verbose=:;; #(
488 -x | --exclude)
489 shift; opt_exclude="$opt_exclude $1";; #(
491 shift; break;; #(
493 usage;; #(
494 esac
495 shift
496 done
498 # We use rev-list --all a lot. When we finish, git filter-tree saves all the
499 # original refs under refs/original. These will be selected by rev-list --all
500 # which is something we want to avoid. So we bail out when refs/original
501 # exists.
502 if test -n "`git for-each-ref refs/original`"; then
503 if $opt_prune; then
504 rm_original_refs
505 else
506 fatal "There are some refs under refs/original which could be
507 the refs saved by a previous run of myself. This can also occur if you used
508 git filter-branch (which I personally do). Please get rid of them if you want
509 to re-run me or re-run me with the --prune options and I'll do it for you."
513 if $opt_all; then
514 git for-each-ref --shell --format='ref=%(refname)' refs/heads >"$tmp_buf"
515 rv=$?
516 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
517 while read line
519 eval "$line"
520 set "$@" "$ref"
521 done <"$tmp_buf"
524 if $opt_remote; then
525 git for-each-ref --shell --format='ref=%(refname)' refs/remotes >"$tmp_buf"
526 rv=$?
527 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
528 while read line
530 eval "$line"
531 branch=`basename "$ref"`
532 case $branch in #(
533 HEAD) # Skip branches named `HEAD' (which does happen)
534 continue;; # because they create ambiguities.
535 esac
536 # if the local $branch does not already exist, we create one
537 exists=`git rev-list --no-walk refs/heads/"$branch" 2>/dev/null`
538 if test -z "$exists"; then # the $branch does not locally exist
539 verb "creating branch '$branch' from '$ref'"
540 git branch "$branch" "$ref" \
541 || fatal "could not create branch '$branch' from '$ref'"
542 set "$@" "refs/heads/$branch"
543 else # there already is a local $branch
544 sha1=`git rev-list --no-walk "$ref"`
545 # Maybe the existing local $branch is identical to the remote $ref?
546 if test "$sha1" = "$exists"; then # OK, local = remote
547 verb "branch '$branch' is already properly initialized to '$ref'"
548 set "$@" "refs/heads/$branch"
549 else # KO, local != remote
550 warn "there already exists a local branch '$branch'
551 and it is at $exists whereas the remote branch '$ref'
552 is at $sha1 so I'm skipping it..."
555 done <"$tmp_buf"
558 # No refspec given => work on HEAD
559 test -z "$*" && set HEAD
560 graft_file="$GIT_DIR/info/grafts"
562 if test -z "$exclude_pattern"; then
563 has_exclude=false
564 else
565 has_exclude=:
568 if test -z "$log_exclude_pattern"; then
569 has_log_exclude=false
570 else
571 has_log_exclude=:
574 totalmerge=0
575 totalconverted=0
576 nbranch=0
577 skipped= # space separated list of commit that we skipped
578 parent_unknown= # space separated list of commit of which we couldn't figure
579 # out the merge parent
580 for refspec
582 nmerge=0 # number of merges seen for $refspec
583 nconverted=0 # number of them we actually grafted
584 nalready=0 # number of them that were already grafted/imported
585 doit "$refspec"
587 if test "$nalready" -eq "0"; then
588 alrmsg=
589 else
590 alrmsg=" ($nalready already converted)"
592 echo ">>> processed $nconverted/$nmerge merges$alrmsg in $refspec"
594 totalmerge=$(($totalmerge + $nmerge))
595 totalconverted=$(($totalconverted + $nconverted))
596 nbranch=$(($nbranch + 1))
597 done
599 rewrite_history
601 if test "$warnings" -ne 0; then
602 warn "job completed with $warnings warnings:$warn_msgs"
604 if test "$totalmerge" -ne "$totalconverted"; then
605 skipped=`echo "$skipped" | tr ' ' '\\n' | sort -u | xargs`
606 echo "The following commits have been skipped: $skipped"
608 # FIXME: Print a warning for each unused --merge argument.
609 echo "Done. Processed $totalconverted/$totalmerge merges in $nbranch branches"