Find the merge parent faster.
[svn-merge2git.git] / svn-merge2git.sh
blobcdea480ff3eae3d2f68561ea4e375c9bd33a5a3b
1 #!/bin/sh
2 # Copyright (c) 2008 Benoit Sigoure <tsuna@lrde.epita.fr>
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 OPTIONS_SPEC="\
18 svn-merge2git [options] [refspec]
20 a,all Do the work on all local the branches
21 d,debug Turn on debug mode (useful if you're hacking the script)
22 n,dry-run Do the entire processing without actually changing anything
23 r,remote Include remote branches (create a local branch for each of them)
24 v,verbose Be more verbose
26 SUBDIRECTORY_OK=Yes
27 . git-sh-setup
28 cd_to_toplevel
30 : ${TMPDIR=/tmp}
31 export TMPDIR
33 # BRE (Basic RegExp) compatible with `git rev-list --grep' and `sed'. The RE
34 # *must* capture the revision merged in its first group.
35 merge_pattern='[Mm]erge.*[0-9][0-9]*:\([0-9][0-9]*\)'
37 # BRE which is used to exclude commits whose line that matches of
38 # $merge_pattern also match this pattern.
39 exclude_pattern='Finish'
41 # BRE which is used to exclude matches in the commit log of potential merge
42 # commits.
43 log_exclude_pattern='Finish.*merge'
45 # extract_svn_branch_name <string>
46 # --------------------------------
47 # Find the string the name of a SVN branch. Put the result in
48 # $svn_branch_name. Assumes SVN "stdlayout".
49 extract_svn_branch_name()
51 case $1 in
52 '')
53 fatal 'extract_svn_branch_name called with empty argument';;
54 */branches/*)
55 extract_svn_branch_name_ 'branches' "$1";;
56 */tags/*)
57 extract_svn_branch_name_ 'tags' "$1"
58 warn "found a merge from tag '$svn_branch_name'";;
59 */trunk*)
60 svn_branch_name='trunk';;
61 esac
64 # extract_svn_branch_name_ <kind> <string>
65 # ----------------------------------------
66 # Helper of extract_svn_branch_name below to factor some code.
67 # <kind> is probably either 'branches' or 'tags' (for SVN "stdlayout").
68 # Put the result in $svn_branch_name.
69 extract_svn_branch_name_()
71 # XXX: Assumes that a branch name does contain a whitespace. Fragile.
72 sed_tmp="s|.*/\\($1/[^ ]*\\).*|\\1|"
73 svn_branch_name=`echo "$2" | sed "$sed_tmp"`
76 me=`basename "$0"`
78 # fatal <msg>
79 # -----------
80 # print <msg> on stderr and exit 1
81 fatal()
83 die "$me: error: $*"
86 warnings=0
87 # warn <msg>
88 # ----------
89 # print <msg> on stderr
90 warn()
92 echo "$me: warning: $*" >&2
93 warnings=$(($warnings + 1))
96 # verb <msg>
97 # ----------
98 # Print <msg> when verbose mode is enabled.
99 verb()
101 $opt_verbose && echo "$*"
104 # debug <msg>
105 # -----------
106 # Print <msg> when debug mode is enabled.
107 debug()
109 $opt_debug && echo "$*"
112 # find_merge_parent <ref> <merge-line>
113 # ------------------------------------
114 # Return (in $merge_parent) the sha1 of the commit that has been merged in by
115 # <ref>. <merge-line> must be a line extracted from the commit message of
116 # <ref> and will be used to extract the SVN revision merged. For instance, if
117 # <ref> is a SVN merge of merge-line='Merge -r42:51 in branch foo', this
118 # function puts the sha1 of the first commit the revision of which is <= 51
119 # which happens to be in branch foo in $merge_parent.
120 # If the name of the branch being merged couldn't be found, $merge_parent
121 # contains 'unknown'.
122 find_merge_parent()
124 # Find the first line that matches $merge_pattern, do the substitution and
125 # quit. Ignore all the other lines.
126 sed_tmp="s/.*$merge_pattern.*/\\1/"
127 svn_merge_to=`echo "$2" | sed "$sed_tmp"`
128 case $svn_merge_to in #(
129 '' | *[^0-9]*)
130 fatal "invalid SVN revision '$svn_merge_to' found in $1";;
131 esac
132 # Now $svn_merge_to is not necessarily a commit that took part of the
133 # merge. For instance, you can merge -r42:51 https://.../branches/foo
134 # even if the last commit in branch foo is at r46. So it's utterly
135 # important that we find the last commit on the branch being merged the
136 # revision of which must be <= $svn_merge_to (which is 51 in this example).
137 extract_svn_branch_name "$2"
138 if test -z "$svn_branch_name"; then
139 merge_parent='unknown'
140 return 0
142 # Create a range to intelligently limit the match of rev-list. This will
143 # produce a RE that rules out all the impossible revision numbers (that is,
144 # the revisions >TO). e.g:
145 # 7 -> ([0-7])
146 # 42 -> (4[0-2]|[0-3][0-9]|[1-9])
147 # 123 -> (12[0-3]|1[0-1][0-9]|0[0-9][0-9]|[1-9][0-9]{0,1})
148 # 6951 -> (695[0-1]|69[0-4][0-9]|6[0-8][0-9][0-9]|[0-5][0-9][0-9][0-9]|[1-9][0-9]{0,2})
149 perl_tmp='$_ = "'"$svn_merge_to"'";
150 my $l = length($_);
151 my @r;
152 foreach my $i (0 .. $l - 1) {
153 /^(\d*)(\d)(\d{$i})$/;
154 my ($a, $b, $c) = ($1, int($2), $3);
155 if ($i != 0) {
156 # Avoid pitfalls e.g. 10[0-9] or 0[0-9][0-9] for 101
157 next if $b == 0 or ($b == 1 and $a eq "");
158 --$b;
160 $b = "[0-$b]" if $b;
161 $c =~ s/./[0-9]/g;
162 push(@r, "$a$b$c");
164 push(@r, "[1-9]" . ($l - 2 ? "[0-9]{0," . ($l - 2) . "}" : ""))
165 if $l > 1;
166 print "(" . join("|", @r) . ")";'
167 rev_range=`perl -we "$perl_tmp"`
168 sed_tmp='s/^ *git-svn-id: .*@\([0-9]*\) [-0-9a-f]*$/\1/p'
169 svn_merge_parent=`git rev-list --all -1 --header -E \
170 --grep="^ *git-svn-id: .*/$svn_branch_name@$rev_range [-0-9a-f]*\\$" \
171 | sed -n "$sed_tmp"`
172 case $svn_merge_parent in #(
173 '' | *[^0-9]*) fatal "invalid svn_merge_parent: '$svn_merge_parent'";;
174 esac
175 rv=$?
176 test $rv -eq 0 || fatal "perl returned $rv"
177 if $opt_verbose; then
178 if test "$svn_merge_to" -eq "$svn_merge_parent"; then
179 verb_tmp=
180 else
181 verb_tmp=" (in fact r$svn_merge_parent)"
184 verb " $1 is merging SVN r$svn_merge_to$verb_tmp from branch $svn_branch_name"
185 # Now find the sha1 of the merge parent.
186 merge_parent=`git rev-list --all \
187 --grep="^ *git-svn-id: .*@$svn_merge_parent [-0-9a-f]*\\$"`
188 rv=$?
189 test $rv -eq 0 || fatal "git rev-list returned $rv"
192 # create_graft <ref> <merge-parent>
193 # ---------------------------------
194 # Add <merge-parent> as 2nd parent of the commit designated by <ref>.
195 create_graft()
197 # --parents will print $1 along with its current parents.
198 grafted_commit=`git rev-list --no-walk --parents "$1"`
199 rv=$?
200 test $rv -eq 0 || fatal "git rev-list returned $rv"
201 graft_merge_parent=$2
203 case $grafted_commit in #(
204 *"$graft_merge_parent"*)
205 debug " not grafting commit $1: $graft_merge_parent is already a parent ($grafted_commit)"
206 return 0;;
207 esac
209 graft="$grafted_commit $graft_merge_parent"
210 existing_graft=`grep "^$1" "$GIT_DIR/info/grafts"`
211 if test $? -eq 0; then
212 if test x"$existing_graft" != x"$graft"; then
213 fatal "$1 is already graft ($existing_graft)\
214 and the graft is different than what I was going to graft ($graft)"
216 debug " not grafting commit $1: already properly grafted"
217 return 0
219 debug " grafting commit $1: add parent $graft_merge_parent"
220 $opt_dryrun && return 0
221 nconverted=$((nconverted + 1))
222 echo >>"$GIT_DIR/info/grafts" "$graft" \
223 || fatal "Failed to add a graft in $GIT_DIR/info/grafts"
226 # rewrite_history
227 # ---------------
228 # Make *all* the grafts part of the actual history.
229 rewrite_history()
231 $opt_dryrun && return 0
232 git filter-branch --parent-filter cat -- --all \
233 || fatal "git filter-branch returned $?"
236 # doit <REF>
237 # ----------
238 # Find all the merge mentionned in the commit messages and make them become
239 # real Git merges.
240 doit()
242 refspec=$1
243 verb " >> Processing merges in the history of $refspec"
245 git rev-list --no-walk "$refspec" >/dev/null \
246 || fatal "'$refspec' does not seem to be a valid refspec"
248 git rev-list --grep="$merge_pattern" "$refspec" >"$tmp_buf"
249 rv=$?
250 test $rv -eq 0 || fatal "git rev-list failed and returned $rv"
251 while read commit; do
252 merge_log=`git log --no-walk "$commit"`
253 rv=$?
254 test $rv -eq 0 || fatal "git log returned $rv"
255 merge_line=`echo "$merge_log" | sed "/$merge_pattern/!d;//q"`
257 # Maybe skip the commit if it matches $exclude_pattern or
258 # $log_exclude_pattern (in which case it's not a merge)
259 if $has_exclude \
260 && echo "$merge_line" | grep -- "$exclude_pattern" >/dev/null; then
261 verb " skipping $commit whose log merge-line is: $merge_line"
262 continue
264 if $has_log_exclude \
265 && echo "$merge_log" | grep -- "$log_exclude_pattern" >/dev/null; then
266 verb " skipping $commit whose log is:"
267 $opt_verbose && echo "$merge_log" | sed 's/^/ | /'
268 continue
271 nmerge=$(($nmerge + 1))
272 verb " $commit is a merge commit, log says:
273 | $merge_line"
275 find_merge_parent "$commit" "$merge_line"
277 case $merge_parent in #(
278 unknown)
279 warn "could not find the merge parent of $commit"
280 continue;; #(
281 '' | *[^0-9a-f]*)
282 fatal "invalid merge_parent: '$merge_parent'";;
283 esac
285 create_graft "$commit" "$merge_parent"
286 test $? -eq 0 || fatal "failed to create a graft for commit $commit"
287 done <"$tmp_buf"
290 # ------------------ #
291 # `main' starts here #
292 # ------------------ #
294 test -d "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not a directory"
295 test -w "$TMPDIR" || fatal "TMPDIR='$TMPDIR' is not writable"
296 tmp_buf=`mktemp "$TMPDIR/$me.XXXXXX"`
297 # Clean up temp file upon exit.
298 trap "exit_status=$?; rm -f $tmp_buf; exit \$exit_status" 0
300 # Parse the options passed to the script.
301 # Initialize the defaults
302 opt_all=false
303 opt_debug=false
304 opt_dryrun=false
305 opt_remote=false
306 opt_verbose=false
308 while test $# != 0
310 case $1 in #(
311 -a | --all)
312 opt_all=:;; #(
313 -d | --debug)
314 opt_debug=:;; #(
315 -n | --dry-run)
316 opt_dryrun=:;; #(
317 -r | --remote)
318 opt_remote=:;; #(
319 -v | --verbose)
320 opt_verbose=:;; #(
322 shift; break;; #(
324 usage;; #(
325 esac
326 shift
327 done
329 if $opt_all; then
330 git for-each-ref --shell --format='ref=%(refname)' refs/heads >"$tmp_buf"
331 rv=$?
332 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
333 while read line
335 eval "$line"
336 set "$@" "$ref"
337 done <"$tmp_buf"
340 if $opt_remote; then
341 git for-each-ref --shell --format='ref=%(refname)' refs/remotes >"$tmp_buf"
342 rv=$?
343 test $rv -eq 0 || fatal "git for-each-ref failed and returned $rv"
344 while read line
346 eval "$line"
347 branch=`basename "$ref"`
348 case $branch in #(
349 HEAD) # Skip branches named `HEAD' (which does happen)
350 continue;; # because they create ambiguities.
351 esac
352 # if the local $branch does not already exist, we create one
353 exists=`git rev-list --no-walk refs/heads/"$branch" 2>/dev/null`
354 if test -z "$exists"; then # the $branch does not locally exist
355 verb "creating branch '$branch' from '$ref'"
356 git branch "$branch" "$ref" \
357 || fatal "could not create branch '$branch' from '$ref'"
358 else # there already is a local $branch
359 sha1=`git rev-list --no-walk "$ref"`
360 # Maybe the existing local $branch is identical to the remote $ref?
361 if test "$sha1" = "$exists"; then # OK, local = remote
362 verb "branch '$branch' is already properly initialized to '$ref'"
363 set "$@" "refs/heads/$branch"
364 else # KO, local != remote
365 warn "there already exists a local branch '$branch'
366 and it is at $exists whereas the remote branch '$ref'
367 is at $sha1 so I'm skipping it..."
370 done <"$tmp_buf"
373 # No refspec given => work on HEAD
374 test -z "$*" && set HEAD
376 if test -z "$exclude_pattern"; then
377 has_exclude=false
378 else
379 has_exclude=:
382 if test -z "$log_exclude_pattern"; then
383 has_log_exclude=false
384 else
385 has_log_exclude=:
388 totalmerge=0
389 totalconverted=0
390 for refspec
392 nconverted=0
393 nmerge=0
394 doit "$refspec"
395 echo ">>> processed $nconverted/$nmerge merges in $refspec"
396 totalmerge=$(($totalmerge + $nmerge))
397 totalconverted=$(($totalconverted + $nconverted))
398 done
400 rewrite_history
402 echo "Done. Processed $totalconverted/$totalmerge merges"
403 test $warnings -eq 0 || warn "job completed with $warnings warnings"