Add more options.
[svn-merge2git.git] / svn-merge2git.sh
blobe8a83e7933e4fb16f06f45dd739307d544c9cad1
1 #!/bin/sh
2 # Copyright (c) 2008 Benoit Sigoure <tsuna@lrde.epita.fr>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 OPTIONS_SPEC="\
18 svn-merge2git [options] [refspec]
20 a,all Do the work on all local the branches
21 d,debug Turn on debug mode (useful if you're hacking the script)
22 H,no-rewrite Do not rewrite the entire history (leave the grafts)
23 n,dry-run Do the entire processing without actually changing anything
24 p,prune Clean up everything after rewriting the history (not undo-able!)
25 P,no-repack Do not repack the resulting repository (implied by -H)
26 r,remote Include remote branches (create a local branch for each of them)
27 v,verbose Be more verbose
29 SUBDIRECTORY_OK=Yes
30 . git-sh-setup
31 cd_to_toplevel
33 : ${TMPDIR=/tmp}
34 export TMPDIR
36 # BRE (Basic RegExp) compatible with `git rev-list --grep' and `sed'. The RE
37 # *must* capture the revision merged in its first group.
38 merge_pattern='[Mm]erge.*[0-9][0-9]*:\([0-9][0-9]*\)'
40 # BRE which is used to exclude commits whose line that matches of
41 # $merge_pattern also match this pattern.
42 exclude_pattern='Finish'
44 # BRE which is used to exclude matches in the commit log of potential merge
45 # commits.
46 log_exclude_pattern='Finish.*merge'
48 # extract_svn_branch_name <string>
49 # --------------------------------
50 # Find the string the name of a SVN branch. Put the result in
51 # $svn_branch_name. Assumes SVN "stdlayout".
52 extract_svn_branch_name()
54 case $1 in
55 '')
56 fatal 'extract_svn_branch_name called with empty argument';;
57 */branches/*)
58 extract_svn_branch_name_ 'branches' "$1";;
59 */tags/*)
60 extract_svn_branch_name_ 'tags' "$1"
61 warn "found a merge from tag '$svn_branch_name'";;
62 */trunk*)
63 svn_branch_name='trunk';;
64 esac
67 # extract_svn_branch_name_ <kind> <string>
68 # ----------------------------------------
69 # Helper of extract_svn_branch_name below to factor some code.
70 # <kind> is probably either 'branches' or 'tags' (for SVN "stdlayout").
71 # Put the result in $svn_branch_name.
72 extract_svn_branch_name_()
74 # XXX: Assumes that a branch name does contain a whitespace. Fragile.
75 sed_tmp="s|.*/\\($1/[^ ]*\\).*|\\1|"
76 svn_branch_name=`echo "$2" | sed "$sed_tmp"`
79 me=`basename "$0"`
81 # fatal <msg>
82 # -----------
83 # print <msg> on stderr and exit 1
84 fatal()
86 die "$me: error: $*"
89 warnings=0
90 # warn <msg>
91 # ----------
92 # print <msg> on stderr
93 warn()
95 echo "$me: warning: $*" >&2
96 warnings=$(($warnings + 1))
99 # verb <msg>
100 # ----------
101 # Print <msg> when verbose mode is enabled.
102 verb()
104 $opt_verbose && echo "$*"
107 # debug <msg>
108 # -----------
109 # Print <msg> when debug mode is enabled.
110 debug()
112 $opt_debug && echo "$*"
115 # find_merge_parent <ref> <merge-line>
116 # ------------------------------------
117 # Return (in $merge_parent) the sha1 of the commit that has been merged in by
118 # <ref>. <merge-line> must be a line extracted from the commit message of
119 # <ref> and will be used to extract the SVN revision merged. For instance, if
120 # <ref> is a SVN merge of merge-line='Merge -r42:51 in branch foo', this
121 # function puts the sha1 of the first commit the revision of which is <= 51
122 # which happens to be in branch foo in $merge_parent.
123 # If the name of the branch being merged couldn't be found, $merge_parent
124 # contains 'unknown'.
125 find_merge_parent()
127 # Find the first line that matches $merge_pattern, do the substitution and
128 # quit. Ignore all the other lines.
129 sed_tmp="s/.*$merge_pattern.*/\\1/"
130 svn_merge_to=`echo "$2" | sed "$sed_tmp"`
131 case $svn_merge_to in #(
132 '' | *[^0-9]*)
133 fatal "invalid SVN revision '$svn_merge_to' found in $1";;
134 esac
135 # Now $svn_merge_to is not necessarily a commit that took part of the
136 # merge. For instance, you can merge -r42:51 https://.../branches/foo
137 # even if the last commit in branch foo is at r46. So it's utterly
138 # important that we find the last commit on the branch being merged the
139 # revision of which must be <= $svn_merge_to (which is 51 in this example).
140 extract_svn_branch_name "$2"
141 if test -z "$svn_branch_name"; then
142 merge_parent='unknown'
143 return 0
145 # Create a range to intelligently limit the match of rev-list. This will
146 # produce a RE that rules out all the impossible revision numbers (that is,
147 # the revisions >TO). e.g:
148 # 7 -> ([0-7])
149 # 42 -> (4[0-2]|[0-3][0-9]|[1-9])
150 # 123 -> (12[0-3]|1[0-1][0-9]|0[0-9][0-9]|[1-9][0-9]{0,1})
151 # 6951 -> (695[0-1]|69[0-4][0-9]|6[0-8][0-9][0-9]|[0-5][0-9][0-9][0-9]|[1-9][0-9]{0,2})
152 perl_tmp='$_ = "'"$svn_merge_to"'";
153 my $l = length($_);
154 my @r;
155 foreach my $i (0 .. $l - 1) {
156 /^(\d*)(\d)(\d{$i})$/;
157 my ($a, $b, $c) = ($1, int($2), $3);
158 if ($i != 0) {
159 # Avoid pitfalls e.g. 10[0-9] or 0[0-9][0-9] for 101
160 next if $b == 0 or ($b == 1 and $a eq "");
161 --$b;
163 $b = "[0-$b]" if $b;
164 $c =~ s/./[0-9]/g;
165 push(@r, "$a$b$c");
167 push(@r, "[1-9]" . ($l - 2 ? "[0-9]{0," . ($l - 2) . "}" : ""))
168 if $l > 1;
169 print "(" . join("|", @r) . ")";'
170 rev_range=`perl -we "$perl_tmp"`
171 sed_tmp='s/^ *git-svn-id: .*@\([0-9]*\) [-0-9a-f]*$/\1/p'
172 svn_merge_parent=`git rev-list --all -1 --header -E \
173 --grep="^ *git-svn-id: .*/$svn_branch_name@$rev_range [-0-9a-f]*\\$" \
174 | sed -n "$sed_tmp"`
175 case $svn_merge_parent in #(
176 '' | *[^0-9]*) fatal "invalid svn_merge_parent: '$svn_merge_parent'";;
177 esac
178 rv=$?
179 test $rv -eq 0 || fatal "perl returned $rv"
180 if $opt_verbose; then
181 if test "$svn_merge_to" -eq "$svn_merge_parent"; then
182 verb_tmp=
183 else
184 verb_tmp=" (in fact r$svn_merge_parent)"
187 verb " $1 is merging SVN r$svn_merge_to$verb_tmp from branch $svn_branch_name"
188 # Now find the sha1 of the merge parent.
189 merge_parent=`git rev-list --all \
190 --grep="^ *git-svn-id: .*@$svn_merge_parent [-0-9a-f]*\\$"`
191 rv=$?
192 test $rv -eq 0 || fatal "git rev-list returned $rv"
195 # create_graft <ref> <merge-parent>
196 # ---------------------------------
197 # Add <merge-parent> as 2nd parent of the commit designated by <ref>.
198 create_graft()
200 # --parents will print $1 along with its current parents.
201 grafted_commit=`git rev-list --no-walk --parents "$1"`
202 rv=$?
203 test $rv -eq 0 || fatal "git rev-list returned $rv"
204 graft_merge_parent=$2
206 case $grafted_commit in #(
207 *"$graft_merge_parent"*)
208 debug " not grafting commit $1: $graft_merge_parent is already a parent ($grafted_commit)"
209 return 0;;
210 esac
212 graft="$grafted_commit $graft_merge_parent"
213 existing_graft=`grep "^$1" "$graft_file"`
214 if test $? -eq 0; then
215 if test x"$existing_graft" != x"$graft"; then
216 fatal "$1 is already graft ($existing_graft)\
217 and the graft is different than what I was going to graft ($graft)"
219 debug " not grafting commit $1: already properly grafted"
220 return 0
222 debug " grafting commit $1: add parent $graft_merge_parent"
223 $opt_dryrun && return 0
224 nconverted=$(($nconverted + 1))
225 echo >>"$graft_file" "$graft" \
226 || fatal "Failed to add a graft in $graft_file"
229 # rm_original_refs
230 # ----------------
231 # Remove all the refs under refs/original.
232 rm_original_refs()
234 if test -f "$GIT_DIR/packed-refs"; then
235 sed -i '/refs\/original\//d' "$GIT_DIR/packed-refs" \
236 || warn "Failed to edit '$GIT_DIR/packed-refs' (sed returned $?)"
238 rm -rf "$GIT_DIR/refs/original"
241 # rewrite_history
242 # ---------------
243 # Make *all* the grafts part of the actual history.
244 rewrite_history()
246 $opt_dryrun && return 0
247 filter_branch='git filter-branch --parent-filter cat -- --all'
248 if $opt_rewrite; then
249 # Refresh all the timestamps. I don't know why, they always change with
250 # me (only the timestamps!) and git filter-branch will complain because
251 # git diff-files will return differences (due to the timestamp change).
252 # FIXME: Investigate why this script seems to touch the entire WC.
253 git status >/dev/null
254 if test -f "$graft_file"; then
255 $filter_branch || fatal "git filter-branch returned $?"
256 rm "$graft_file" || warn "Failed to rm $graft_file"
257 else
258 warn "No history rewriting necessary"
260 # FIXME: Is it really necessary to repack if we didn't go through the
261 # previous `if'?
262 if $opt_repack; then
263 if $opt_prune; then
264 prune='--prune'
265 rm_original_refs
266 else
267 prune=
269 git gc $prune || warn "git gc $prune returned $?"
271 else
272 echo "$me: use '$filter_branch' to rewrite the entire history"
276 # doit <REF>
277 # ----------
278 # Find all the merge mentionned in the commit messages and make them become
279 # real Git merges.
280 doit()
282 refspec=$1
283 verb " >> Processing merges in the history of $refspec"
285 git rev-list --no-walk "$refspec" >/dev/null \
286 || fatal "'$refspec' does not seem to be a valid refspec"
288 git rev-list --grep="$merge_pattern" "$refspec" >"$tmp_buf"
289 rv=$?
290 test $rv -eq 0 || fatal "git rev-list failed and returned $rv"
291 while read commit; do
292 merge_log=`git log --no-walk "$commit"`
293 rv=$?
294 test $rv -eq 0 || fatal "git log returned $rv"
295 merge_line=`echo "$merge_log" | sed "/$merge_pattern/!d;//q"`
297 # Maybe skip the commit if it matches $exclude_pattern or
298 # $log_exclude_pattern (in which case it's not a merge)
299 if $has_exclude \
300 && echo "$merge_line" | grep -- "$exclude_pattern" >/dev/null; then
301 verb " skipping $commit whose log merge-line is: $merge_line"
302 continue
304 if $has_log_exclude \
305 && echo "$merge_log" | grep -- "$log_exclude_pattern" >/dev/null; then
306 verb " skipping $commit whose log is:"
307 $opt_verbose && echo "$merge_log" | sed 's/^/ | /'
308 continue
311 nmerge=$(($nmerge + 1))
312 verb " $commit is a merge commit, log says:
313 | $merge_line"
315 find_merge_parent "$commit" "$merge_line"
317 case $merge_parent in #(
318 unknown)
319 warn "could not find the merge parent of $commit"
320 continue;; #(
321 '' | *[^0-9a-f]*)
322 fatal "invalid merge_parent: '$merge_parent'";;
323 esac
325 create_graft "$commit" "$merge_parent"
326 test $? -eq 0 || fatal "failed to create a graft for commit $commit"
327 done <"$tmp_buf"
330 # ------------------ #
331 # `main' starts here #
332 # ------------------ #
334 test -d "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not a directory"
335 test -w "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not writable"
336 tmp_buf=`mktemp "$TMPDIR/$me.XXXXXX"`
337 # Clean up temp file upon exit.
338 trap "exit_status=$?; rm -f $tmp_buf; exit \$exit_status" 0
340 # Parse the options passed to the script.
341 # Initialize the defaults
342 opt_all=false
343 opt_debug=false
344 opt_dryrun=false
345 opt_prune=false
346 opt_remote=false
347 opt_repack=:
348 opt_rewrite=:
349 opt_verbose=false
351 # -------- #
352 # `getopt' #
353 # -------- #
354 while test $# != 0
356 case $1 in #(
357 -a | --all)
358 opt_all=:;; #(
359 -d | --debug)
360 opt_debug=:;; #(
361 -H | --no-rewrite)
362 opt_rewrite=false;; #(
363 -n | --dry-run)
364 opt_dryrun=:;; #(
365 -p | --prune)
366 opt_prune=:;; #(
367 -P | --no-repack)
368 opt_repack=false;; #(
369 -r | --remote)
370 opt_remote=:;; #(
371 -v | --verbose)
372 opt_verbose=:;; #(
374 shift; break;; #(
376 usage;; #(
377 esac
378 shift
379 done
381 # We use rev-list --all a lot. When we finish, git filter-tree saves all the
382 # original refs under refs/original. These will be selected by rev-list --all
383 # which is something we want to avoid. So we bail out when refs/original
384 # exists.
385 if test -n "`git for-each-ref refs/original`"; then
386 if $opt_prune; then
387 rm_original_refs
388 else
389 fatal "There are some refs under refs/original which could be
390 the refs saved by a previous run of myself. This can also occur if you used
391 git filter-branch (which I personally do). Please get rid of them if you want
392 to re-run me or re-run me with the --prune options and I'll do it for you."
396 if $opt_all; then
397 git for-each-ref --shell --format='ref=%(refname)' refs/heads >"$tmp_buf"
398 rv=$?
399 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
400 while read line
402 eval "$line"
403 set "$@" "$ref"
404 done <"$tmp_buf"
407 if $opt_remote; then
408 git for-each-ref --shell --format='ref=%(refname)' refs/remotes >"$tmp_buf"
409 rv=$?
410 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
411 while read line
413 eval "$line"
414 branch=`basename "$ref"`
415 case $branch in #(
416 HEAD) # Skip branches named `HEAD' (which does happen)
417 continue;; # because they create ambiguities.
418 esac
419 # if the local $branch does not already exist, we create one
420 exists=`git rev-list --no-walk refs/heads/"$branch" 2>/dev/null`
421 if test -z "$exists"; then # the $branch does not locally exist
422 verb "creating branch '$branch' from '$ref'"
423 git branch "$branch" "$ref" \
424 || fatal "could not create branch '$branch' from '$ref'"
425 else # there already is a local $branch
426 sha1=`git rev-list --no-walk "$ref"`
427 # Maybe the existing local $branch is identical to the remote $ref?
428 if test "$sha1" = "$exists"; then # OK, local = remote
429 verb "branch '$branch' is already properly initialized to '$ref'"
430 set "$@" "refs/heads/$branch"
431 else # KO, local != remote
432 warn "there already exists a local branch '$branch'
433 and it is at $exists whereas the remote branch '$ref'
434 is at $sha1 so I'm skipping it..."
437 done <"$tmp_buf"
440 # No refspec given => work on HEAD
441 test -z "$*" && set HEAD
442 graft_file="$GIT_DIR/info/grafts"
444 if test -z "$exclude_pattern"; then
445 has_exclude=false
446 else
447 has_exclude=:
450 if test -z "$log_exclude_pattern"; then
451 has_log_exclude=false
452 else
453 has_log_exclude=:
456 totalmerge=0
457 totalconverted=0
458 for refspec
460 nconverted=0
461 nmerge=0
462 doit "$refspec"
463 echo ">>> processed $nconverted/$nmerge merges in $refspec"
464 totalmerge=$(($totalmerge + $nmerge))
465 totalconverted=$(($totalconverted + $nconverted))
466 done
468 rewrite_history
470 echo "Done. Processed $totalconverted/$totalmerge merges"
471 test $warnings -eq 0 || warn "job completed with $warnings warnings"