Handle user-specified merges and exclusions.
[svn-merge2git.git] / svn-merge2git.sh
blob15c5330398da857228bc5d8313d9725f7577b8d8
1 #!/bin/sh
2 # Copyright (c) 2008 Benoit Sigoure <tsuna@lrde.epita.fr>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 OPTIONS_SPEC="\
18 svn-merge2git [options] [refspec]
20 a,all Do the work on all local the branches
21 d,debug Turn on debug mode (useful if you're hacking the script)
22 H,no-rewrite Do not rewrite the entire history (leave the grafts)
23 m,merge= Arg of the form: refspec:rev:branch. Marks refspec as a merge
24 n,dry-run Do the entire processing without actually changing anything
25 p,prune Clean up everything after rewriting the history (not undo-able!)
26 P,no-repack Do not repack the resulting repository (implied by -H)
27 r,remote Include remote branches (create a local branch for each of them)
28 v,verbose Be more verbose
29 x,exclude= refspec to exclude from the potential merge commits
31 SUBDIRECTORY_OK=Yes
32 . git-sh-setup
33 cd_to_toplevel
35 : ${TMPDIR=/tmp}
36 export TMPDIR
38 # BRE (Basic RegExp) compatible with `git rev-list --grep' and `sed'. The RE
39 # *must* capture the revision merged in its first group.
40 merge_pattern='[Mm]erge.*[0-9][0-9]*:\([0-9][0-9]*\)'
42 # BRE which is used to exclude commits whose line that matches of
43 # $merge_pattern also match this pattern.
44 exclude_pattern='Finish'
46 # BRE which is used to exclude matches in the commit log of potential merge
47 # commits.
48 log_exclude_pattern='Finish.*merge'
50 # extract_svn_branch_name <string>
51 # --------------------------------
52 # Find the string the name of a SVN branch. Put the result in
53 # $svn_branch_name. Assumes SVN "stdlayout".
54 extract_svn_branch_name()
56 case $1 in #(
57 '')
58 fatal 'extract_svn_branch_name called with empty argument';; #(
59 */branches/*)
60 extract_svn_branch_name_ 'branches' "$1";; #(
61 */tags/*)
62 extract_svn_branch_name_ 'tags' "$1"
63 warn "found a merge from tag '$svn_branch_name'";; #(
64 */trunk*)
65 svn_branch_name='trunk';; #(
66 *'from the branch "'*'"'*)
67 sed_tmp='s/^.*from the branch "//;s/".*$//'
68 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
69 *[Mm]'erge'*[0-9]:[0-9]*' with '*)
70 sed_tmp='s/.*with //;s/\.$//'
71 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
72 *[Mm]'erge'*[0-9]:[0-9]*' from '*)
73 sed_tmp='s/.*from //;s/\.$//'
74 svn_branch_name="branches/"`echo "$1" | sed "$sed_tmp"`;; #(
76 svn_branch_name=;;
77 esac
80 # extract_svn_branch_name_ <kind> <string>
81 # ----------------------------------------
82 # Helper of extract_svn_branch_name below to factor some code.
83 # <kind> is probably either 'branches' or 'tags' (for SVN "stdlayout").
84 # Put the result in $svn_branch_name.
85 extract_svn_branch_name_()
87 # XXX: Assumes that a branch name does contain a whitespace. Fragile.
88 sed_tmp="s|.*/\\($1/[^ ]*\\).*|\\1|"
89 svn_branch_name=`echo "$2" | sed "$sed_tmp"`
92 me=`basename "$0"`
94 # fatal <msg>
95 # -----------
96 # print <msg> on stderr and exit 1
97 fatal()
99 die "$me: error: $*"
102 warnings=0
103 warn_msgs=
104 # warn <msg>
105 # ----------
106 # print <msg> on stderr
107 warn()
109 echo "$me: warning: $*" >&2
110 warnings=$(($warnings + 1))
111 warn_msgs="$warn_msgs
112 (`date`) warning: $*"
115 # verb <msg>
116 # ----------
117 # Print <msg> when verbose mode is enabled.
118 verb()
120 $opt_verbose && echo "$*"
123 # debug <msg>
124 # -----------
125 # Print <msg> when debug mode is enabled.
126 debug()
128 $opt_debug && echo "$*"
131 # find_merge_parent <ref> <merge-line>
132 # ------------------------------------
133 # Return (in $merge_parent) the sha1 of the commit that has been merged in by
134 # <ref>. <merge-line> must be a line extracted from the commit message of
135 # <ref> and will be used to extract the SVN revision merged. For instance, if
136 # <ref> is a SVN merge of merge-line='Merge -r42:51 in branch foo', this
137 # function puts the sha1 of the first commit the revision of which is <= 51
138 # which happens to be in branch foo in $merge_parent.
139 # If the name of the branch being merged couldn't be found, $merge_parent
140 # contains 'unknown'.
141 find_merge_parent()
143 if $user_defined_merge; then
144 sed_tmp="s/^.*@$1:\\([1-9][0-9]*\\):[^@]*@.*\$/\\1/"
145 svn_merge_to=`echo "$opt_merge" | sed "$sed_tmp"`
146 else
147 # Find the first line that matches $merge_pattern, do the substitution and
148 # quit. Ignore all the other lines.
149 sed_tmp="s/.*$merge_pattern.*/\\1/"
150 svn_merge_to=`echo "$2" | sed "$sed_tmp"`
152 case $svn_merge_to in #(
153 '' | 0* | *[^0-9]*)
154 fatal "invalid SVN revision '$svn_merge_to' found in $1";;
155 esac
156 # Now $svn_merge_to is not necessarily a commit that took part of the
157 # merge. For instance, you can merge -r42:51 https://.../branches/foo
158 # even if the last commit in branch foo is at r46. So it's utterly
159 # important that we find the last commit on the branch being merged the
160 # revision of which must be <= $svn_merge_to (which is 51 in this example).
161 if $user_defined_merge; then
162 sed_tmp="s/^.*@$1:[^:@]*:\\([^@]*\\)@.*\$/\\1/"
163 svn_branch_name=`echo "$opt_merge" | sed "$sed_tmp"`
164 test -n "$svn_branch_name" || fatal "internal error in find_merge_parent"
165 else
166 extract_svn_branch_name "$2"
167 if test -z "$svn_branch_name"; then
168 merge_parent='unknown'
169 return 0
172 # Create a range to intelligently limit the match of rev-list. This will
173 # produce a RE that rules out all the impossible revision numbers (that is,
174 # the revisions >TO). e.g:
175 # 7 -> ([0-7])
176 # 42 -> (4[0-2]|[0-3][0-9]|[1-9])
177 # 123 -> (12[0-3]|1[0-1][0-9]|0[0-9][0-9]|[1-9][0-9]{0,1})
178 # 6951 -> (695[0-1]|69[0-4][0-9]|6[0-8][0-9][0-9]|[0-5][0-9][0-9][0-9]|[1-9][0-9]{0,2})
179 perl_tmp='$_ = "'"$svn_merge_to"'";
180 my $l = length($_);
181 my @r;
182 foreach my $i (0 .. $l - 1) {
183 /^(\d*)(\d)(\d{$i})$/;
184 my ($a, $b, $c) = ($1, int($2), $3);
185 if ($i != 0) {
186 # Avoid pitfalls e.g. 10[0-9] or 0[0-9][0-9] for 101
187 next if $b == 0 or ($b == 1 and $a eq "");
188 --$b;
190 $b = "[0-$b]" if $b;
191 $c =~ s/./[0-9]/g;
192 push(@r, "$a$b$c");
194 push(@r, "[1-9]" . ($l - 2 ? "[0-9]{0," . ($l - 2) . "}" : ""))
195 if $l > 1;
196 print "(" . join("|", @r) . ")";'
197 rev_range=`perl -we "$perl_tmp"`
198 sed_tmp='s/^ *git-svn-id: .*@\([0-9]*\) [-0-9a-f]*$/\1/p'
199 svn_merge_parent=`git rev-list --all -1 --header -E \
200 --grep="^ *git-svn-id: .*/$svn_branch_name@$rev_range [-0-9a-f]*\\$" \
201 | sed -n "$sed_tmp"`
202 case $svn_merge_parent in #(
203 '' | 0* | *[^0-9]*) fatal "invalid svn_merge_parent: '$svn_merge_parent'";;
204 esac
205 rv=$?
206 test $rv -eq 0 || fatal "perl returned $rv"
207 if $opt_verbose; then
208 if test "$svn_merge_to" -eq "$svn_merge_parent"; then
209 verb_tmp=
210 else
211 verb_tmp=" (in fact r$svn_merge_parent)"
214 verb " $1 is merging SVN r$svn_merge_to$verb_tmp from SVN $svn_branch_name"
215 # Now find the sha1 of the merge parent.
216 merge_parent=`git rev-list --all \
217 --grep="^ *git-svn-id: .*@$svn_merge_parent [-0-9a-f]*\\$"`
218 rv=$?
219 test $rv -eq 0 || fatal "git rev-list returned $rv"
222 # create_graft <ref> <merge-parent>
223 # ---------------------------------
224 # Add <merge-parent> as 2nd parent of the commit designated by <ref>.
225 create_graft()
227 # --parents will print $1 along with its current parents.
228 grafted_commit=`git rev-list --no-walk --parents "$1"`
229 rv=$?
230 test $rv -eq 0 || fatal "git rev-list returned $rv"
231 graft_merge_parent=$2
233 case $grafted_commit in #(
234 *"$graft_merge_parent"*)
235 debug " not grafting commit $1: $graft_merge_parent is already a parent ($grafted_commit)"
236 return 0;;
237 esac
239 graft="$grafted_commit $graft_merge_parent"
240 existing_graft=`grep "^$1" "$graft_file"`
241 if test $? -eq 0; then
242 if test x"$existing_graft" != x"$graft"; then
243 fatal "$1 is already graft ($existing_graft)\
244 and the graft is different than what I was going to graft ($graft)"
246 nalready=$(($nalready + 1))
247 debug " not grafting commit $1: already properly grafted"
248 return 0
250 debug " grafting commit $1: add parent $graft_merge_parent"
251 $opt_dryrun && return 0
252 nconverted=$(($nconverted + 1))
253 echo >>"$graft_file" "$graft" \
254 || fatal "Failed to add a graft in $graft_file"
257 # rm_original_refs
258 # ----------------
259 # Remove all the refs under refs/original.
260 rm_original_refs()
262 if test -f "$GIT_DIR/packed-refs"; then
263 sed -i '/refs\/original\//d' "$GIT_DIR/packed-refs" \
264 || warn "Failed to edit '$GIT_DIR/packed-refs' (sed returned $?)"
266 rm -rf "$GIT_DIR/refs/original"
269 # rewrite_history
270 # ---------------
271 # Make *all* the grafts part of the actual history.
272 rewrite_history()
274 filter_branch='git filter-branch --parent-filter cat -- --all'
275 if test -n "$parent_unknown"; then
276 parent_unknown=`echo "$parent_unknown" | tr ' ' '\\n' | sort -u | xargs`
277 echo "I could not find the merge parent or merged branch in the following:
278 $parent_unknown
279 If you know which revision they are merging from which branch, you can invoke
280 me again with --merge <ref>:<rev>:<branch> and I will do the magic for you."
281 if $opt_rewrite; then
282 echo -n "Do you want to go ahead and rewrite the entire history\
283 anyway? [y/N] "
284 read answer || {
285 warn "failed to read your answer... I'm not rewriting anything."
286 return 1
288 case $answer in #(
289 [yY]*)
290 echo "Alright so I'll rewrite the history now.
291 Bear in mind that all the refs I printed (especially the ones above of which I
292 couldn't find the parents) will be (most likely) changed so you'll have to
293 figure out by yourself what they are to use them --merge"
294 echo -n 'Rewriting history in'
295 for i in 5 4 3 2 1; do
296 echo -n " ... $i"
297 sleep 1
298 done
299 echo ' ... 0'
300 ;; #(
302 echo "OK. Use '$filter_branch' when you're ready or invoke me again."
303 return 0;;
304 esac
308 if $opt_rewrite; then
309 # Refresh all the timestamps. I don't know why, they always change with
310 # me (only the timestamps!) and git filter-branch will complain because
311 # git diff-files will return differences (due to the timestamp change).
312 # FIXME: Investigate why this script seems to touch the entire WC.
313 # The weird thing is that it works even with a read-only WC.
314 git status >/dev/null
315 $opt_dryrun && return 0
316 if test -f "$graft_file"; then
317 $filter_branch || fatal "git filter-branch returned $?"
318 rm "$graft_file" || warn "Failed to rm $graft_file"
319 else
320 warn "No history rewriting necessary"
322 # FIXME: Is it really necessary to repack if we didn't go through the
323 # previous `if'?
324 if $opt_repack; then
325 if $opt_prune; then
326 prune='--prune'
327 rm_original_refs
328 else
329 prune=
331 git gc $prune || warn "git gc $prune returned $?"
333 else
334 echo "$me: use '$filter_branch' to rewrite the entire history"
338 # doit <REF>
339 # ----------
340 # Find all the merge mentioned in the commit messages and make them become
341 # real Git merges.
342 # Commits that are skipped are stored in $skipped.
343 doit()
345 refspec=$1
346 verb " >> Processing merges in the history of $refspec"
348 git rev-list --no-walk "$refspec" >/dev/null \
349 || fatal "'$refspec' does not seem to be a valid refspec"
351 git rev-list --grep="$merge_pattern" "$refspec" >"$tmp_buf"
352 rv=$?
353 test $rv -eq 0 || fatal "git rev-list failed and returned $rv"
354 while read commit; do
355 case $opt_exclude in #(
356 *" $commit "*)
357 skipped="$skipped $commit"
358 verb " skipping $commit because it's listed in --exclude";;
359 esac
361 merge_log=`git log --no-walk "$commit"`
362 rv=$?
363 test $rv -eq 0 || fatal "git log returned $rv"
364 merge_line=`echo "$merge_log" | sed "/$merge_pattern/!d;//q"`
366 case $opt_merge in #(
367 *"@$commit:"*':'*'@'*)
368 user_defined_merge=:;; #(
370 user_defined_merge=false;;
371 esac
373 # Maybe skip the commit if it matches $exclude_pattern or
374 # $log_exclude_pattern (in which case it's not a merge)
375 if $user_defined_merge; then
376 : # Do not consider this commit for pattern-based exclusion.
377 else
378 if $has_exclude \
379 && echo "$merge_line" | grep -- "$exclude_pattern" >/dev/null; then
380 skipped="$skipped $commit"
381 verb " skipping $commit whose log merge-line is: $merge_line"
382 continue
384 if $has_log_exclude \
385 && echo "$merge_log" | grep -- "$log_exclude_pattern" >/dev/null; then
386 skipped="$skipped $commit"
387 verb " skipping $commit whose log is:"
388 $opt_verbose && echo "$merge_log" | sed 's/^/ | /'
389 continue
393 nmerge=$(($nmerge + 1))
394 verb " $commit is a merge commit, log says:
395 | $merge_line"
397 find_merge_parent "$commit" "$merge_line"
399 case $merge_parent in #(
400 unknown)
401 warn "could not find the merge parent of $commit"
402 parent_unknown="$parent_unknown $commit"
403 continue;; #(
404 '' | *[^0-9a-f]*)
405 fatal "invalid merge_parent: '$merge_parent'";;
406 esac
408 create_graft "$commit" "$merge_parent"
409 test $? -eq 0 || fatal "failed to create a graft for commit $commit"
410 done <"$tmp_buf"
413 # ------------------ #
414 # `main' starts here #
415 # ------------------ #
417 test -d "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not a directory"
418 test -w "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not writable"
419 tmp_buf=`mktemp "$TMPDIR/$me.XXXXXX"`
420 # Clean up temp file upon exit.
421 trap "exit_status=$?; rm -f $tmp_buf; exit \$exit_status" 0
423 # Parse the options passed to the script.
424 # Initialize the defaults
425 opt_all=false
426 opt_debug=false
427 opt_dryrun=false
428 opt_exclude=
429 # '@'-separated list of ':'-separated triplets (refspec:rev:branch)
430 opt_merge=
431 opt_prune=false
432 opt_remote=false
433 opt_repack=:
434 opt_rewrite=:
435 opt_verbose=false
437 # -------- #
438 # `getopt' #
439 # -------- #
440 while test $# != 0
442 case $1 in #(
443 -a | --all)
444 opt_all=:;; #(
445 -d | --debug)
446 opt_debug=:;; #(
447 -H | --no-rewrite)
448 opt_rewrite=false;; #(
449 -m | --merge)
450 shift
451 refspec=${1%%:*}
452 test -n `git rev-list --no-walk "$refspec"` \
453 || fatal "invalid refspec '$refspec' in --merge argument"
454 svn_merge_to=${1%:*}
455 svn_merge_to=${svn_merge_to#*:}
456 case $svn_merge_to in #(
457 '' | 0* | *[^0-9]*)
458 fatal "invalid SVN revision '$svn_merge_to' in --merge argument";;
459 esac
460 svn_branch_name=${1#*:}
461 svn_branch_name=${svn_branch_name#*:}
462 case $svn_merge_to in #(
464 fatal "empty SVN branch name in --merge argument";; #(
465 *' '*)
466 fatal "whitespace unsupported in SVN branch name in --merge argument";;
467 *'@'*)
468 fatal "at (@) unsupported in SVN branch name in --merge argument";;
469 esac
470 opt_merge="$opt_merge@$1@"
471 ;; #(
472 -n | --dry-run)
473 opt_dryrun=:;; #(
474 -p | --prune)
475 opt_prune=:;; #(
476 -P | --no-repack)
477 opt_repack=false;; #(
478 -r | --remote)
479 opt_remote=:;; #(
480 -v | --verbose)
481 opt_verbose=:;; #(
482 -x | --exclude)
483 shift; opt_exclude="$opt_exclude $1";; #(
485 shift; break;; #(
487 usage;; #(
488 esac
489 shift
490 done
492 # We use rev-list --all a lot. When we finish, git filter-tree saves all the
493 # original refs under refs/original. These will be selected by rev-list --all
494 # which is something we want to avoid. So we bail out when refs/original
495 # exists.
496 if test -n "`git for-each-ref refs/original`"; then
497 if $opt_prune; then
498 rm_original_refs
499 else
500 fatal "There are some refs under refs/original which could be
501 the refs saved by a previous run of myself. This can also occur if you used
502 git filter-branch (which I personally do). Please get rid of them if you want
503 to re-run me or re-run me with the --prune options and I'll do it for you."
507 if $opt_all; then
508 git for-each-ref --shell --format='ref=%(refname)' refs/heads >"$tmp_buf"
509 rv=$?
510 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
511 while read line
513 eval "$line"
514 set "$@" "$ref"
515 done <"$tmp_buf"
518 if $opt_remote; then
519 git for-each-ref --shell --format='ref=%(refname)' refs/remotes >"$tmp_buf"
520 rv=$?
521 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
522 while read line
524 eval "$line"
525 branch=`basename "$ref"`
526 case $branch in #(
527 HEAD) # Skip branches named `HEAD' (which does happen)
528 continue;; # because they create ambiguities.
529 esac
530 # if the local $branch does not already exist, we create one
531 exists=`git rev-list --no-walk refs/heads/"$branch" 2>/dev/null`
532 if test -z "$exists"; then # the $branch does not locally exist
533 verb "creating branch '$branch' from '$ref'"
534 git branch "$branch" "$ref" \
535 || fatal "could not create branch '$branch' from '$ref'"
536 set "$@" "refs/heads/$branch"
537 else # there already is a local $branch
538 sha1=`git rev-list --no-walk "$ref"`
539 # Maybe the existing local $branch is identical to the remote $ref?
540 if test "$sha1" = "$exists"; then # OK, local = remote
541 verb "branch '$branch' is already properly initialized to '$ref'"
542 set "$@" "refs/heads/$branch"
543 else # KO, local != remote
544 warn "there already exists a local branch '$branch'
545 and it is at $exists whereas the remote branch '$ref'
546 is at $sha1 so I'm skipping it..."
549 done <"$tmp_buf"
552 # No refspec given => work on HEAD
553 test -z "$*" && set HEAD
554 graft_file="$GIT_DIR/info/grafts"
556 if test -z "$exclude_pattern"; then
557 has_exclude=false
558 else
559 has_exclude=:
562 if test -z "$log_exclude_pattern"; then
563 has_log_exclude=false
564 else
565 has_log_exclude=:
568 totalmerge=0
569 totalconverted=0
570 nbranch=0
571 skipped= # space separated list of commit that we skipped
572 parent_unknown= # space separated list of commit of which we couldn't figure
573 # out the merge parent
574 for refspec
576 nmerge=0 # number of merges seen for $refspec
577 nconverted=0 # number of them we actually grafted
578 nalready=0 # number of them that were already grafted/imported
579 doit "$refspec"
581 if test "$nalready" -eq "0"; then
582 alrmsg=
583 else
584 alrmsg=" ($nalready already converted)"
586 echo ">>> processed $nconverted/$nmerge merges$alrmsg in $refspec"
588 totalmerge=$(($totalmerge + $nmerge))
589 totalconverted=$(($totalconverted + $nconverted))
590 nbranch=$(($nbranch + 1))
591 done
593 rewrite_history
595 if test "$warnings" -ne 0; then
596 warn "job completed with $warnings warnings:$warn_msgs"
598 if test "$totalmerge" -ne "$totalconverted"; then
599 skipped=`echo "$skipped" | tr ' ' '\\n' | sort -u | xargs`
600 echo "The following commits have been skipped: $skipped"
602 # FIXME: Print a warning for each unused --merge argument.
603 echo "Done. Processed $totalconverted/$totalmerge merges in $nbranch branches"