2 # Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
4 # Distributed under terms of the GNU General Public License
5 # This program comes with NO WARRANTY.
12 # - use binary search to find from-patch ica. conflict.
13 # - use checkpointing to optimize speed?
14 # - use get --partial ?
21 import xml
.dom
.minidom
23 import gdbm
as dbmodule
27 ################################################################
33 mail_to_name_dict
= {}
38 ################################################################
41 class PullConflict (Exception):
43 class CommandFailed (Exception):
47 sys
.stderr
.write (s
+ '\n')
49 def get_cli_options ():
50 p
= optparse
.OptionParser ()
52 p
.usage
='''darcs2git [OPTIONS] DARCS-REPO'''
53 p
.description
='''Convert darcs repo to git.
55 This tool is a conversion utility for Darcs repositories. It requires
56 a Git version that has git-fast-import. It does not support incremental
59 This tool will import the patches in chronological order, and only creates
60 merges when a resolved conflict is detected.
64 def update_map (option
, opt
, value
, parser
):
65 for l
in open (value
).readlines ():
66 (mail
, name
) = tuple (l
.strip ().split ('='))
67 mail_to_name_dict
[mail
] = name
69 p
.add_option ('-a', '--authors', action
='callback',
73 help='read a text file, containing EMAIL=NAME lines')
75 p
.add_option ('--checkpoint-frequency', action
='store',
76 dest
='checkpoint_frequency',
79 help='how often should the git importer be synced?')
81 p
.add_option ('-d', '--destination', action
='store',
84 dest
='target_git_repo',
85 help='where to put the resulting Git repo.')
87 p
.add_option ('--verbose', action
='store_true',
90 help='show commands as they are invoked')
92 p
.add_option ('--history-window', action
='store',
93 dest
='history_window',
96 help='Look back this many patches as conflict ancestors.')
98 p
.add_option ('--debug', action
='store_true',
101 help="""add patch numbers to commit messages;
102 don\'t clean conversion repo;
106 options
, args
= p
.parse_args ()
111 options
.basename
= os
.path
.basename (os
.path
.normpath (args
[0])).replace ('.darcs', '')
112 if not options
.target_git_repo
:
113 options
.target_git_repo
= options
.basename
+ '.git'
117 name
= options
.target_git_repo
.replace ('.git', '.log')
118 if name
== options
.target_git_repo
:
121 progress ("Shell log to %s" % name
)
122 log_file
= open (name
, 'w')
124 return (options
, args
)
126 def read_pipe (cmd
, ignore_errors
=False):
128 progress ('pipe %s' % cmd
)
129 pipe
= os
.popen (cmd
)
132 if pipe
.close () and not ignore_errors
:
133 raise CommandFailed ("Pipe failed: %s" % cmd
)
137 def system (c
, ignore_error
=0):
142 log_file
.write ('%s\n' % c
)
145 if os
.system (c
) and not ignore_error
:
146 raise CommandFailed ("Command failed: %s" % c
)
148 def darcs_date_to_git (x
):
149 t
= time
.strptime (x
, '%Y%m%d%H%M%S')
150 return '%d' % int (time
.mktime (t
))
152 def darcs_timezone (x
) :
153 time
.strptime (x
, '%a %b %d %H:%M:%S %Z %Y')
158 ################################################################
161 class DarcsConversionRepo
:
162 def __init__ (self
, dir, patches
):
163 self
.dir = os
.path
.abspath (dir)
164 self
.patches
= patches
166 self
._current
_number
= -1
168 self
._inventory
_dict
= None
170 def is_contiguous (self
):
171 if not self
._is
_valid
:
174 darcs_dir
= self
.dir + '/_darcs'
175 if not os
.path
.exists (darcs_dir
):
178 inv
= self
.inventory ()
179 for p
in self
.patches
[:self
._current
_number
+ 1]:
180 if not self
.has_patch (p
):
183 if self
._current
_number
+ 1 != len (inv
.split ('\n[')):
188 def has_patch (self
, p
):
189 assert self
._is
_valid
191 return self
.inventory_dict ().has_key (p
.short_id ())
193 def pristine_tree (self
):
194 return self
.dir + '/_darcs/pristine'
196 def go_back (self
, count
):
197 assert self
._is
_valid
201 progress ('Rewinding %d patches' % count
)
202 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
203 d
= self
.inventory_dict ()
204 for p
in self
.patches
[self
._current
_number
- count
:self
._current
_number
+1]:
208 system ('rm -rf %s' % self
.dir)
212 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
214 def pull (self
, patch
):
215 id = patch
.attributes
['hash']
216 source_repo
= patch
.dir
219 progress ('Pull patch %d' % patch
.number
)
220 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
222 self
._current
_number
= patch
.number
224 def create_fresh (self
):
226 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
228 self
._is
_valid
= True
229 self
._current
_number
= -1
230 self
._inventory
_dict
= None
232 def inventory (self
):
233 darcs_dir
= self
.dir + '/_darcs'
235 for f
in [darcs_dir
+ '/inventory'] + glob
.glob (darcs_dir
+ '/inventories/*'):
236 i
+= open (f
).read ()
239 def inventory_dict (self
):
240 if type (self
._inventory
_dict
) != type ({}):
241 self
._inventory
_dict
= {}
244 self
._inventory
_dict
[m
.group (1)] = 1
246 re
.sub (r
'\n([^*\n]+\*\*[0-9]+)', note_patch
, self
.inventory ())
247 return self
._inventory
_dict
248 def start_at (self
, num
):
249 progress ('Go to patch %d' % num
)
253 iv
= open (dir + '/_darcs/inventory', 'w')
255 log_file
.write ("# messing with _darcs/inventory")
257 for p
in self
.patches
[:num
+1]:
258 os
.link (p
.filename (), dir + '/_darcs/patches/' + os
.path
.basename (p
.filename ()))
259 iv
.write (p
.header ())
263 system ('cd %(dir)s && darcs repair --quiet' % locals ())
265 self
._current
_number
= num
266 self
._is
_valid
= True
268 def go_from_to (self
, from_patch
, to_patch
):
270 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
271 PullConflict if conflict is detected
273 This uses the fishy technique of writing the inventory and
274 constructing the pristine tree with 'darcs repair'
276 It might be quicker and/or more correct to wind/rewind the
277 repo with pull and unpull."""
279 valid
= self
.is_contiguous ()
282 where
= 'at %d' % self
._current
_number
284 progress ('Trying %s -> %s %s' % (from_patch
,
287 source
= to_patch
.dir
290 if from_patch
.number
== self
._current
_number
and valid
:
292 elif from_patch
.number
< self
._current
_number
and valid
:
293 self
.go_back (self
._current
_number
- from_patch
.number
)
295 ## go back doesn't work reliably.
296 self
.start_at (from_patch
.number
)
302 success
= 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
303 except CommandFailed
:
304 self
._is
_valid
= False
305 raise PullConflict ()
308 raise PullConflict ()
312 return 'patch %d' % self
.number
314 def __init__ (self
, xml
, dir):
319 for (nm
, value
) in xml
.attributes
.items():
320 self
.attributes
[nm
] = value
322 # fixme: ugh attributes vs. methods.
323 self
.extract_author ()
324 self
.extract_message ()
328 return '%s**%s' % (self
.attributes
['author'], self
.attributes
['hash'].split ('-')[0])
331 return self
.dir + '/_darcs/patches/' + self
.attributes
['hash']
334 f
= gzip
.open (self
.filename ())
338 lines
= self
.contents ().split ('\n')
341 committer
= lines
[1] + '\n'
342 committer
= re
.sub ('] {\n$', ']\n', committer
)
343 committer
= re
.sub ('] *\n$', ']\n', committer
)
345 if not committer
.endswith (']\n'):
352 header
= name
+ '\n' + committer
356 assert header
[-1] == '\n'
359 def extract_author (self
):
360 mail
= self
.attributes
['author']
362 m
= re
.search ("^(.*) <(.*)>$", mail
)
369 name
= mail_to_name_dict
[mail
]
371 name
= mail
.split ('@')[0]
373 self
.author_name
= name
374 self
.author_mail
= mail
376 def extract_time (self
):
377 self
.date
= darcs_date_to_git (self
.attributes
['date']) + ' ' + darcs_timezone (self
.attributes
['local_date'])
380 patch_name
= '(no comment)'
382 name_elt
= self
.xml
.getElementsByTagName ('name')[0]
383 patch_name
= name_elt
.childNodes
[0].data
388 def extract_message (self
):
389 patch_name
= self
.name ()
390 comment_elts
= self
.xml
.getElementsByTagName ('comment')
393 comment
= comment_elts
[0].childNodes
[0].data
395 if self
.attributes
['inverted'] == 'True':
396 patch_name
= 'UNDO: ' + patch_name
398 self
.message
= '%s\n\n%s' % (patch_name
, comment
)
401 patch_name
= self
.name ()
402 if patch_name
.startswith ("TAG "):
404 tag
= re
.sub (r
'\s', '_', tag
).strip ()
405 tag
= re
.sub (r
':', '_', tag
).strip ()
409 def get_darcs_patches (darcs_repo
):
410 progress ('reading patches.')
412 xml_string
= read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo
)
414 dom
= xml
.dom
.minidom
.parseString(xml_string
)
415 xmls
= dom
.documentElement
.getElementsByTagName('patch')
417 patches
= [DarcsPatch (x
, darcs_repo
) for x
in xmls
]
426 ################################################################
430 def __init__ (self
, parent
, darcs_patch
):
432 self
.darcs_patch
= darcs_patch
434 self
.depth
= parent
.depth
+ 1
439 return self
.darcs_patch
.number
441 def parent_patch (self
):
443 return self
.parent
.darcs_patch
447 def common_ancestor (a
, b
):
449 if a
.depth
< b
.depth
:
451 elif a
.depth
> b
.depth
:
465 def export_checkpoint (gfi
):
466 gfi
.write ('checkpoint\n\n')
468 def export_tree (tree
, gfi
):
469 tree
= os
.path
.normpath (tree
)
470 gfi
.write ('deleteall\n')
471 for (root
, dirs
, files
) in os
.walk (tree
):
473 rf
= os
.path
.normpath (os
.path
.join (root
, f
))
474 s
= open (rf
).read ()
475 rf
= rf
.replace (tree
+ '/', '')
477 gfi
.write ('M 644 inline %s\n' % rf
)
478 gfi
.write ('data %d\n%s\n' % (len (s
), s
))
482 def export_commit (repo
, patch
, last_patch
, gfi
):
483 gfi
.write ('commit refs/heads/darcstmp%d\n' % patch
.number
)
484 gfi
.write ('mark :%d\n' % (patch
.number
+ 1))
485 gfi
.write ('committer %s <%s> %s\n' % (patch
.author_name
,
491 msg
+= '\n\n#%d\n' % patch
.number
493 gfi
.write ('data %d\n%s\n' % (len (msg
), msg
))
497 for (n
, p
) in pending_patches
.items ():
498 if repo
.has_patch (p
):
500 del pending_patches
[n
]
504 and git_commits
.has_key (last_patch
.number
)):
505 mergers
= [last_patch
.number
]
508 gfi
.write ('from :%d\n' % (mergers
[0] + 1))
509 for m
in mergers
[1:]:
510 gfi
.write ('merge :%d\n' % (m
+ 1))
512 pending_patches
[patch
.number
] = patch
513 export_tree (repo
.pristine_tree (), gfi
)
518 n
= last_patch
.number
519 git_commits
[patch
.number
] = GitCommit (git_commits
.get (n
, None),
522 def export_pending (gfi
):
523 if len (pending_patches
.items ()) == 1:
524 gfi
.write ('reset refs/heads/master\n')
525 gfi
.write ('from :%d\n\n' % (pending_patches
.values()[0].number
+1))
528 for (n
, p
) in pending_patches
.items ():
529 gfi
.write ('reset refs/heads/master%d\n' % n
)
530 gfi
.write ('from :%d\n\n' % (n
+1))
532 patches
= pending_patches
.values()
534 gfi
.write ('commit refs/heads/master\n')
535 gfi
.write ('committer %s <%s> %s\n' % (patch
.author_name
,
539 gfi
.write ('data %d\n%s\n' % (len(msg
), msg
))
540 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
541 for p
in patches
[1:]:
542 gfi
.write ('merge :%d\n' % (p
.number
+ 1))
545 def export_tag (patch
, gfi
):
546 gfi
.write ('tag %s\n' % patch
.tag_name ())
547 gfi
.write ('from :%d\n' % (patch
.number
+ 1))
548 gfi
.write ('tagger %s <%s> %s\n' % (patch
.author_name
,
551 gfi
.write ('data %d\n%s\n' % (len (patch
.message
),
554 ################################################################
556 def test_conversion (darcs_repo
, git_repo
):
557 gd
= options
.basename
+ '.checkouttmp.git'
558 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
559 diff
= read_pipe ('diff --exclude .git -urN %(gd)s %(darcs_repo)s/_darcs/pristine' % locals (), ignore_errors
=True)
560 system ('rm -rf %(gd)s' % locals ())
562 progress ("Conversion introduced changes: %s" % diff
)
565 progress ("Checkout matches pristine darcs tree.")
568 (options
, args
) = get_cli_options ()
570 darcs_repo
= os
.path
.abspath (args
[0])
571 git_repo
= os
.path
.abspath (options
.target_git_repo
)
573 if os
.path
.exists (git_repo
):
574 system ('rm -rf %(git_repo)s' % locals ())
576 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
577 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
579 os
.environ
['GIT_DIR'] = git_repo
581 gfi
= os
.popen ('git-fast-import --quiet', 'w')
583 patches
= get_darcs_patches (darcs_repo
)
584 conv_repo
= DarcsConversionRepo (options
.basename
+ ".tmpdarcs", patches
)
589 combinations
= [(v
, w
) for v
in pending_patches
.values ()
590 for w
in pending_patches
.values ()]
591 candidates
= [common_ancestor (git_commits
[c
[0].number
], git_commits
[c
[1].number
]) for c
in combinations
]
592 candidates
= sorted ([(-a
.depth
, a
) for a
in candidates
])
593 for (depth
, c
) in candidates
:
596 conv_repo
.go_from_to (q
, p
)
599 parent_number
= q
.number
600 progress ('Found existing common parent as predecessor')
606 ## no branches found where we could attach.
607 ## try previous commits one by one.
609 parent_number
= p
.number
- 2
611 if parent_number
>= 0:
612 parent_patch
= patches
[parent_number
]
615 conv_repo
.go_from_to (parent_patch
, p
)
618 ## simplistic, may not be enough.
619 progress ('conflict, going one back')
622 if parent_number
< 0:
625 if (options
.history_window
626 and parent_number
< p
.number
- options
.history_window
):
631 if parent_number
>= 0 or p
.number
== 0:
632 progress ('Export %d -> %d (total %d)' % (parent_number
,
633 p
.number
, len (patches
)))
634 export_commit (conv_repo
, p
, parent_patch
, gfi
)
638 if p
.number
% options
.checkpoint_frequency
== 0:
639 export_checkpoint (gfi
)
641 progress ("Can't import patch %d, need conflict resolution patch?" % p
.number
)
646 system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ())
647 test_conversion (darcs_repo
, git_repo
)
649 if not options
.debug
: