ignorance
[darcs2git.git] / darcs2git.py
blob2361dc3fd8179ee40d0622c4d624962da7deb64d
2 # Copyright (c) 2007 Han-Wen Nienhuys <hanwen@xs4all.nl>
4 # Distributed under terms of the GNU General Public License
5 # This program comes with NO WARRANTY.
9 # TODO:
10 # - time zones
11 # - file modes
12 # - use binary search to find from-patch ica. conflict.
13 # - use checkpointing to optimize speed?
14 # - use get --partial ?
17 import glob
18 import os
19 import sys
20 import time
21 import xml.dom.minidom
22 import re
23 import gdbm as dbmodule
24 import gzip
25 import optparse
27 ################################################################
28 # globals
31 log_file = None
32 options = None
33 mail_to_name_dict = {}
34 pending_patches = {}
35 git_commits = {}
36 used_tags = {}
38 ################################################################
39 # utils
41 class PullConflict (Exception):
42 pass
43 class CommandFailed (Exception):
44 pass
46 def progress (s):
47 sys.stderr.write (s + '\n')
49 def get_cli_options ():
50 p = optparse.OptionParser ()
52 p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
53 p.description='''Convert darcs repo to git.
55 This tool is a conversion utility for Darcs repositories. It requires
56 a Git version that has git-fast-import. It does not support incremental
57 updating.
59 This tool will import the patches in chronological order, and only creates
60 merges when a resolved conflict is detected.
62 '''
64 def update_map (option, opt, value, parser):
65 for l in open (value).readlines ():
66 (mail, name) = tuple (l.strip ().split ('='))
67 mail_to_name_dict[mail] = name
69 p.add_option ('-a', '--authors', action='callback',
70 callback=update_map,
71 type='string',
72 nargs=1,
73 help='read a text file, containing EMAIL=NAME lines')
75 p.add_option ('--checkpoint-frequency', action='store',
76 dest='checkpoint_frequency',
77 type='int',
78 default=100,
79 help='how often should the git importer be synced?')
81 p.add_option ('-d', '--destination', action='store',
82 type='string',
83 default='',
84 dest='target_git_repo',
85 help='where to put the resulting Git repo.')
87 p.add_option ('--verbose', action='store_true',
88 dest='verbose',
89 default=False,
90 help='show commands as they are invoked')
92 p.add_option ('--history-window', action='store',
93 dest='history_window',
94 type='int',
95 default=0,
96 help='Look back this many patches as conflict ancestors.')
98 p.add_option ('--debug', action='store_true',
99 dest='debug',
100 default=False,
101 help="""add patch numbers to commit messages;
102 don\'t clean conversion repo;
103 test end result.""")
105 global options
106 options, args = p.parse_args ()
107 if not args:
108 p.print_help ()
109 sys.exit (2)
111 options.basename = os.path.basename (os.path.normpath (args[0])).replace ('.darcs', '')
112 if not options.target_git_repo:
113 options.target_git_repo = options.basename + '.git'
115 if options.debug:
116 global log_file
117 name = options.target_git_repo.replace ('.git', '.log')
118 if name == options.target_git_repo:
119 name += '.log'
121 progress ("Shell log to %s" % name)
122 log_file = open (name, 'w')
124 return (options, args)
126 def read_pipe (cmd, ignore_errors=False):
127 if options.verbose:
128 progress ('pipe %s' % cmd)
129 pipe = os.popen (cmd)
131 val = pipe.read ()
132 if pipe.close () and not ignore_errors:
133 raise CommandFailed ("Pipe failed: %s" % cmd)
135 return val
137 def system (c, ignore_error=0):
138 if options.verbose:
139 progress (c)
141 if log_file:
142 log_file.write ('%s\n' % c)
143 log_file.flush ()
145 if os.system (c) and not ignore_error:
146 raise CommandFailed ("Command failed: %s" % c)
148 def darcs_date_to_git (x):
149 t = time.strptime (x, '%Y%m%d%H%M%S')
150 return '%d' % int (time.mktime (t))
152 def darcs_timezone (x) :
153 time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')
155 # todo
156 return "+0100"
158 ################################################################
159 # darcs
161 class DarcsConversionRepo:
162 def __init__ (self, dir, patches):
163 self.dir = os.path.abspath (dir)
164 self.patches = patches
166 self._current_number = -1
167 self._is_valid = -1
168 self._inventory_dict = None
170 def is_contiguous (self):
171 if not self._is_valid:
172 return False
174 darcs_dir = self.dir + '/_darcs'
175 if not os.path.exists (darcs_dir):
176 return False
178 inv = self.inventory ()
179 for p in self.patches[:self._current_number + 1]:
180 if not self.has_patch (p):
181 return False
183 if self._current_number + 1 != len (inv.split ('\n[')):
184 return False
186 return True
188 def has_patch (self, p):
189 assert self._is_valid
191 return self.inventory_dict ().has_key (p.short_id ())
193 def pristine_tree (self):
194 return self.dir + '/_darcs/pristine'
196 def go_back (self, count):
197 assert self._is_valid
199 self.checkout ()
200 dir = self.dir
201 progress ('Rewinding %d patches' % count)
202 system ('cd %(dir)s && echo ay|darcs obliterate --ignore-times --last %(count)d' % locals ())
203 d = self.inventory_dict ()
204 for p in self.patches[self._current_number - count:self._current_number+1]:
205 del d[p.short_id ()]
207 def clean (self):
208 system ('rm -rf %s' % self.dir)
210 def checkout (self):
211 dir = self.dir
212 system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
214 def pull (self, patch):
215 id = patch.attributes['hash']
216 source_repo = patch.dir
217 dir = self.dir
219 progress ('Pull patch %d' % patch.number)
220 system ('cd %(dir)s && darcs pull --ignore-times --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())
222 self._current_number = patch.number
224 def create_fresh (self):
225 dir = self.dir
226 system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s'
227 % locals ())
228 self._is_valid = True
229 self._current_number = -1
230 self._inventory_dict = None
232 def inventory (self):
233 darcs_dir = self.dir + '/_darcs'
234 i = ''
235 for f in [darcs_dir + '/inventory'] + glob.glob (darcs_dir + '/inventories/*'):
236 i += open (f).read ()
237 return i
239 def inventory_dict (self):
240 if type (self._inventory_dict) != type ({}):
241 self._inventory_dict = {}
243 def note_patch (m):
244 self._inventory_dict[m.group (1)] = 1
246 re.sub (r'\n([^*\n]+\*\*[0-9]+)', note_patch, self.inventory ())
247 return self._inventory_dict
248 def start_at (self, num):
249 progress ('Go to patch %d' % num)
251 self.create_fresh ()
252 dir = self.dir
253 iv = open (dir + '/_darcs/inventory', 'w')
254 if log_file:
255 log_file.write ("# messing with _darcs/inventory")
257 for p in self.patches[:num+1]:
258 os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
259 iv.write (p.header ())
261 iv.close ()
263 system ('cd %(dir)s && darcs repair --quiet' % locals ())
264 self.checkout ()
265 self._current_number = num
266 self._is_valid = True
268 def go_from_to (self, from_patch, to_patch):
270 """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
271 PullConflict if conflict is detected
273 This uses the fishy technique of writing the inventory and
274 constructing the pristine tree with 'darcs repair'
276 It might be quicker and/or more correct to wind/rewind the
277 repo with pull and unpull."""
279 valid = self.is_contiguous ()
280 where = ''
281 if valid:
282 where = 'at %d' % self._current_number
284 progress ('Trying %s -> %s %s' % (from_patch,
285 to_patch, where))
286 dir = self.dir
287 source = to_patch.dir
289 if from_patch:
290 if from_patch.number == self._current_number and valid:
291 pass
292 elif from_patch.number < self._current_number and valid:
293 self.go_back (self._current_number - from_patch.number)
294 else:
295 ## go back doesn't work reliably.
296 self.start_at (from_patch.number)
297 else:
298 self.create_fresh ()
300 try:
301 self.pull (to_patch)
302 success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ())
303 except CommandFailed:
304 self._is_valid = False
305 raise PullConflict ()
307 if not success:
308 raise PullConflict ()
310 class DarcsPatch:
311 def __repr__ (self):
312 return 'patch %d' % self.number
314 def __init__ (self, xml, dir):
315 self.xml = xml
316 self.dir = dir
317 self.number = -1
318 self.attributes = {}
319 for (nm, value) in xml.attributes.items():
320 self.attributes[nm] = value
322 # fixme: ugh attributes vs. methods.
323 self.extract_author ()
324 self.extract_message ()
325 self.extract_time ()
327 def short_id (self):
328 return '%s**%s' % (self.attributes['author'], self.attributes['hash'].split ('-')[0])
330 def filename (self):
331 return self.dir + '/_darcs/patches/' + self.attributes['hash']
333 def contents (self):
334 f = gzip.open (self.filename ())
335 return f.read ()
337 def header (self):
338 lines = self.contents ().split ('\n')
340 name = lines[0]
341 committer = lines[1] + '\n'
342 committer = re.sub ('] {\n$', ']\n', committer)
343 committer = re.sub ('] *\n$', ']\n', committer)
344 comment = ''
345 if not committer.endswith (']\n'):
346 for l in lines[2:]:
347 if l[0] == ']':
348 comment += ']\n'
349 break
350 comment += l + '\n'
352 header = name + '\n' + committer
353 if comment:
354 header += comment
356 assert header[-1] == '\n'
357 return header
359 def extract_author (self):
360 mail = self.attributes['author']
361 name = ''
362 m = re.search ("^(.*) <(.*)>$", mail)
364 if m:
365 name = m.group (1)
366 mail = m.group (2)
367 else:
368 try:
369 name = mail_to_name_dict[mail]
370 except KeyError:
371 name = mail.split ('@')[0]
373 self.author_name = name
374 self.author_mail = mail
376 def extract_time (self):
377 self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])
379 def name (self):
380 patch_name = '(no comment)'
381 try:
382 name_elt = self.xml.getElementsByTagName ('name')[0]
383 patch_name = name_elt.childNodes[0].data
384 except IndexError:
385 pass
386 return patch_name
388 def extract_message (self):
389 patch_name = self.name ()
390 comment_elts = self.xml.getElementsByTagName ('comment')
391 comment = ''
392 if comment_elts:
393 comment = comment_elts[0].childNodes[0].data
395 if self.attributes['inverted'] == 'True':
396 patch_name = 'UNDO: ' + patch_name
398 self.message = '%s\n\n%s' % (patch_name, comment)
400 def tag_name (self):
401 patch_name = self.name ()
402 if patch_name.startswith ("TAG "):
403 tag = patch_name[4:]
404 tag = re.sub (r'\s', '_', tag).strip ()
405 tag = re.sub (r':', '_', tag).strip ()
406 return tag
407 return ''
409 def get_darcs_patches (darcs_repo):
410 progress ('reading patches.')
412 xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)
414 dom = xml.dom.minidom.parseString(xml_string)
415 xmls = dom.documentElement.getElementsByTagName('patch')
417 patches = [DarcsPatch (x, darcs_repo) for x in xmls]
419 n = 0
420 for p in patches:
421 p.number = n
422 n += 1
424 return patches
426 ################################################################
427 # GIT export
429 class GitCommit:
430 def __init__ (self, parent, darcs_patch):
431 self.parent = parent
432 self.darcs_patch = darcs_patch
433 if parent:
434 self.depth = parent.depth + 1
435 else:
436 self.depth = 0
438 def number (self):
439 return self.darcs_patch.number
441 def parent_patch (self):
442 if self.parent:
443 return self.parent.darcs_patch
444 else:
445 return None
447 def common_ancestor (a, b):
448 while 1:
449 if a.depth < b.depth:
450 b = b.parent
451 elif a.depth > b.depth:
452 a = a.parent
453 else:
454 break
456 while a and b:
457 if a == b:
458 return a
460 a = a.parent
461 b = b.parent
463 return None
465 def export_checkpoint (gfi):
466 gfi.write ('checkpoint\n\n')
468 def export_tree (tree, gfi):
469 tree = os.path.normpath (tree)
470 gfi.write ('deleteall\n')
471 for (root, dirs, files) in os.walk (tree):
472 for f in files:
473 rf = os.path.normpath (os.path.join (root, f))
474 s = open (rf).read ()
475 rf = rf.replace (tree + '/', '')
477 gfi.write ('M 644 inline %s\n' % rf)
478 gfi.write ('data %d\n%s\n' % (len (s), s))
479 gfi.write ('\n')
482 def export_commit (repo, patch, last_patch, gfi):
483 gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
484 gfi.write ('mark :%d\n' % (patch.number + 1))
485 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
486 patch.author_mail,
487 patch.date))
489 msg = patch.message
490 if options.debug:
491 msg += '\n\n#%d\n' % patch.number
493 gfi.write ('data %d\n%s\n' % (len (msg), msg))
496 mergers = []
497 for (n, p) in pending_patches.items ():
498 if repo.has_patch (p):
499 mergers.append (n)
500 del pending_patches[n]
502 if (last_patch
503 and mergers == []
504 and git_commits.has_key (last_patch.number)):
505 mergers = [last_patch.number]
507 if mergers:
508 gfi.write ('from :%d\n' % (mergers[0] + 1))
509 for m in mergers[1:]:
510 gfi.write ('merge :%d\n' % (m + 1))
512 pending_patches[patch.number] = patch
513 export_tree (repo.pristine_tree (), gfi)
516 n = -1
517 if last_patch:
518 n = last_patch.number
519 git_commits[patch.number] = GitCommit (git_commits.get (n, None),
520 patch)
522 def export_pending (gfi):
523 if len (pending_patches.items ()) == 1:
524 gfi.write ('reset refs/heads/master\n')
525 gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
526 return
528 for (n, p) in pending_patches.items ():
529 gfi.write ('reset refs/heads/master%d\n' % n)
530 gfi.write ('from :%d\n\n' % (n+1))
532 patches = pending_patches.values()
533 patch = patches[0]
534 gfi.write ('commit refs/heads/master\n')
535 gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
536 patch.author_mail,
537 patch.date))
538 msg = 'tie together'
539 gfi.write ('data %d\n%s\n' % (len(msg), msg))
540 gfi.write ('from :%d\n' % (patch.number + 1))
541 for p in patches[1:]:
542 gfi.write ('merge :%d\n' % (p.number + 1))
543 gfi.write ('\n')
545 def export_tag (patch, gfi):
546 gfi.write ('tag %s\n' % patch.tag_name ())
547 gfi.write ('from :%d\n' % (patch.number + 1))
548 gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
549 patch.author_mail,
550 patch.date))
551 gfi.write ('data %d\n%s\n' % (len (patch.message),
552 patch.message))
554 ################################################################
555 # main.
556 def test_conversion (darcs_repo, git_repo):
557 gd = options.basename + '.checkouttmp.git'
558 system ('rm -rf %(gd)s && git clone %(git_repo)s %(gd)s' % locals ())
559 diff = read_pipe ('diff --exclude .git -urN %(gd)s %(darcs_repo)s/_darcs/pristine' % locals (), ignore_errors=True)
560 system ('rm -rf %(gd)s' % locals ())
561 if diff:
562 progress ("Conversion introduced changes: %s" % diff)
563 sys.exit (1)
564 else:
565 progress ("Checkout matches pristine darcs tree.")
567 def main ():
568 (options, args) = get_cli_options ()
570 darcs_repo = os.path.abspath (args[0])
571 git_repo = os.path.abspath (options.target_git_repo)
573 if os.path.exists (git_repo):
574 system ('rm -rf %(git_repo)s' % locals ())
576 system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
577 system ('git --git-dir %(git_repo)s repo-config core.logAllRefUpdates false' % locals ())
579 os.environ['GIT_DIR'] = git_repo
581 gfi = os.popen ('git-fast-import --quiet', 'w')
583 patches = get_darcs_patches (darcs_repo)
584 conv_repo = DarcsConversionRepo (options.basename + ".tmpdarcs", patches)
585 for p in patches:
586 parent_patch = None
587 parent_number = -1
589 combinations = [(v, w) for v in pending_patches.values ()
590 for w in pending_patches.values ()]
591 candidates = [common_ancestor (git_commits[c[0].number], git_commits[c[1].number]) for c in combinations]
592 candidates = sorted ([(-a.depth, a) for a in candidates])
593 for (depth, c) in candidates:
594 q = c.darcs_patch
595 try:
596 conv_repo.go_from_to (q, p)
598 parent_patch = q
599 parent_number = q.number
600 progress ('Found existing common parent as predecessor')
601 break
603 except PullConflict:
604 pass
606 ## no branches found where we could attach.
607 ## try previous commits one by one.
608 if not parent_patch:
609 parent_number = p.number - 2
610 while 1:
611 if parent_number >= 0:
612 parent_patch = patches[parent_number]
614 try:
615 conv_repo.go_from_to (parent_patch, p)
616 break
617 except PullConflict:
618 ## simplistic, may not be enough.
619 progress ('conflict, going one back')
620 parent_number -= 1
622 if parent_number < 0:
623 break
625 if (options.history_window
626 and parent_number < p.number - options.history_window):
628 parent_number = -2
629 break
631 if parent_number >= 0 or p.number == 0:
632 progress ('Export %d -> %d (total %d)' % (parent_number,
633 p.number, len (patches)))
634 export_commit (conv_repo, p, parent_patch, gfi)
635 if p.tag_name ():
636 export_tag (p, gfi)
638 if p.number % options.checkpoint_frequency == 0:
639 export_checkpoint (gfi)
640 else:
641 progress ("Can't import patch %d, need conflict resolution patch?" % p.number)
643 export_pending (gfi)
644 gfi.close ()
646 system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ())
647 test_conversion (darcs_repo, git_repo)
649 if not options.debug:
650 conv_repo.clean ()
652 main ()