remote-bzr: fixes for branch diverge
[alt-git.git] / contrib / remote-helpers / git-remote-bzr
blobbf254a0787fab6e85fcb8289d0efcae704d51149
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
52 class Marks:
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
62 def load(self):
63 if not os.path.exists(self.path):
64 return
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
80 def __str__(self):
81 return str(self.dict())
83 def from_rev(self, rev):
84 return self.marks[rev]
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
98 def is_marked(self, rev):
99 return rev in self.marks
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
112 class Parser:
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
118 def get_line(self):
119 return sys.stdin.readline().strip()
121 def __getitem__(self, i):
122 return self.line.split()[i]
124 def check(self, word):
125 return self.line.startswith(word)
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
132 def __iter__(self):
133 return self.each_block('')
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
181 return '%s <%s>' % (name, mail)
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
187 changes = cur.changes_from(prev)
189 def u(s):
190 return s.encode('utf-8')
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
208 return modified, removed
210 def export_files(tree, files):
211 global marks, filenodes
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
217 h = tree.get_file_sha1(fid)
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
241 mark = marks.next_mark()
242 filenodes[h] = mark
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
249 final.append((mode, mark, path))
251 return final
253 def export_branch(branch, name):
254 global prefix
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
259 repo = branch.repository
260 repo.lock_read()
261 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
262 count = 0
264 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
266 for revid in revs:
268 rev = repo.get_revision(revid)
270 parents = rev.parent_ids
271 time = rev.timestamp
272 tz = rev.timezone
273 committer = rev.committer.encode('utf-8')
274 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
275 authors = rev.get_apparent_authors()
276 if authors:
277 author = authors[0].encode('utf-8')
278 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
279 else:
280 author = committer
281 msg = rev.message.encode('utf-8')
283 msg += '\n'
285 if len(parents) == 0:
286 parent = bzrlib.revision.NULL_REVISION
287 else:
288 parent = parents[0]
290 cur_tree = repo.revision_tree(revid)
291 prev = repo.revision_tree(parent)
292 modified, removed = get_filechanges(cur_tree, prev)
294 modified_final = export_files(cur_tree, modified)
296 if len(parents) == 0:
297 print 'reset %s' % ref
299 print "commit %s" % ref
300 print "mark :%d" % (marks.get_mark(revid))
301 print "author %s" % (author)
302 print "committer %s" % (committer)
303 print "data %d" % (len(msg))
304 print msg
306 for i, p in enumerate(parents):
307 try:
308 m = rev_to_mark(p)
309 except KeyError:
310 # ghost?
311 continue
312 if i == 0:
313 print "from :%s" % m
314 else:
315 print "merge :%s" % m
317 for f in removed:
318 print "D %s" % (f,)
319 for f in modified_final:
320 print "M %s :%u %s" % f
321 print
323 count += 1
324 if (count % 100 == 0):
325 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
326 print "#############################################################"
328 repo.unlock()
330 revid = branch.last_revision()
332 # make sure the ref is updated
333 print "reset %s" % ref
334 print "from :%u" % rev_to_mark(revid)
335 print
337 marks.set_tip(name, revid)
339 def export_tag(repo, name):
340 global tags, prefix
342 ref = '%s/tags/%s' % (prefix, name)
343 print "reset %s" % ref
344 print "from :%u" % rev_to_mark(tags[name])
345 print
347 def do_import(parser):
348 global dirname
350 branch = parser.repo
351 path = os.path.join(dirname, 'marks-git')
353 print "feature done"
354 if os.path.exists(path):
355 print "feature import-marks=%s" % path
356 print "feature export-marks=%s" % path
357 print "feature force"
358 sys.stdout.flush()
360 while parser.check('import'):
361 ref = parser[1]
362 if ref.startswith('refs/heads/'):
363 name = ref[len('refs/heads/'):]
364 export_branch(branch, name)
365 if ref.startswith('refs/tags/'):
366 name = ref[len('refs/tags/'):]
367 export_tag(branch, name)
368 parser.next()
370 print 'done'
372 sys.stdout.flush()
374 def parse_blob(parser):
375 global blob_marks
377 parser.next()
378 mark = parser.get_mark()
379 parser.next()
380 data = parser.get_data()
381 blob_marks[mark] = data
382 parser.next()
384 class CustomTree():
386 def __init__(self, repo, revid, parents, files):
387 global files_cache
389 self.updates = {}
391 def copy_tree(revid):
392 files = files_cache[revid] = {}
393 repo.lock_read()
394 tree = repo.repository.revision_tree(revid)
395 try:
396 for path, entry in tree.iter_entries_by_dir():
397 files[path] = [entry.file_id, None]
398 finally:
399 repo.unlock()
400 return files
402 if len(parents) == 0:
403 self.base_id = bzrlib.revision.NULL_REVISION
404 self.base_files = {}
405 else:
406 self.base_id = parents[0]
407 self.base_files = files_cache.get(self.base_id, None)
408 if not self.base_files:
409 self.base_files = copy_tree(self.base_id)
411 self.files = files_cache[revid] = self.base_files.copy()
412 self.rev_files = {}
414 for path, data in self.files.iteritems():
415 fid, mark = data
416 self.rev_files[fid] = [path, mark]
418 for path, f in files.iteritems():
419 fid, mark = self.files.get(path, [None, None])
420 if not fid:
421 fid = bzrlib.generate_ids.gen_file_id(path)
422 f['path'] = path
423 self.rev_files[fid] = [path, mark]
424 self.updates[fid] = f
426 def last_revision(self):
427 return self.base_id
429 def iter_changes(self):
430 changes = []
432 def get_parent(dirname, basename):
433 parent_fid, mark = self.base_files.get(dirname, [None, None])
434 if parent_fid:
435 return parent_fid
436 parent_fid, mark = self.files.get(dirname, [None, None])
437 if parent_fid:
438 return parent_fid
439 if basename == '':
440 return None
441 fid = bzrlib.generate_ids.gen_file_id(path)
442 add_entry(fid, dirname, 'directory')
443 return fid
445 def add_entry(fid, path, kind, mode = None):
446 dirname, basename = os.path.split(path)
447 parent_fid = get_parent(dirname, basename)
449 executable = False
450 if mode == '100755':
451 executable = True
452 elif mode == '120000':
453 kind = 'symlink'
455 change = (fid,
456 (None, path),
457 True,
458 (False, True),
459 (None, parent_fid),
460 (None, basename),
461 (None, kind),
462 (None, executable))
463 self.files[path] = [change[0], None]
464 changes.append(change)
466 def update_entry(fid, path, kind, mode = None):
467 dirname, basename = os.path.split(path)
468 parent_fid = get_parent(dirname, basename)
470 executable = False
471 if mode == '100755':
472 executable = True
473 elif mode == '120000':
474 kind = 'symlink'
476 change = (fid,
477 (path, path),
478 True,
479 (True, True),
480 (None, parent_fid),
481 (None, basename),
482 (None, kind),
483 (None, executable))
484 self.files[path] = [change[0], None]
485 changes.append(change)
487 def remove_entry(fid, path, kind):
488 dirname, basename = os.path.split(path)
489 parent_fid = get_parent(dirname, basename)
490 change = (fid,
491 (path, None),
492 True,
493 (True, False),
494 (parent_fid, None),
495 (None, None),
496 (None, None),
497 (None, None))
498 del self.files[path]
499 changes.append(change)
501 for fid, f in self.updates.iteritems():
502 path = f['path']
504 if 'deleted' in f:
505 remove_entry(fid, path, 'file')
506 continue
508 if path in self.base_files:
509 update_entry(fid, path, 'file', f['mode'])
510 else:
511 add_entry(fid, path, 'file', f['mode'])
513 self.files[path][1] = f['mark']
514 self.rev_files[fid][1] = f['mark']
516 return changes
518 def get_file_with_stat(self, file_id, path=None):
519 path, mark = self.rev_files[file_id]
520 return (StringIO.StringIO(blob_marks[mark]), None)
522 def get_symlink_target(self, file_id):
523 path, mark = self.rev_files[file_id]
524 return blob_marks[mark]
526 def id2path(self, file_id):
527 path, mark = self.rev_files[file_id]
528 return path
530 def c_style_unescape(string):
531 if string[0] == string[-1] == '"':
532 return string.decode('string-escape')[1:-1]
533 return string
535 def parse_commit(parser):
536 global marks, blob_marks, parsed_refs
537 global mode
539 parents = []
541 ref = parser[1]
542 parser.next()
544 if ref != 'refs/heads/master':
545 die("bzr doesn't support multiple branches; use 'master'")
547 commit_mark = parser.get_mark()
548 parser.next()
549 author = parser.get_author()
550 parser.next()
551 committer = parser.get_author()
552 parser.next()
553 data = parser.get_data()
554 parser.next()
555 if parser.check('from'):
556 parents.append(parser.get_mark())
557 parser.next()
558 while parser.check('merge'):
559 parents.append(parser.get_mark())
560 parser.next()
562 # fast-export adds an extra newline
563 if data[-1] == '\n':
564 data = data[:-1]
566 files = {}
568 for line in parser:
569 if parser.check('M'):
570 t, m, mark_ref, path = line.split(' ', 3)
571 mark = int(mark_ref[1:])
572 f = { 'mode' : m, 'mark' : mark }
573 elif parser.check('D'):
574 t, path = line.split(' ')
575 f = { 'deleted' : True }
576 else:
577 die('Unknown file command: %s' % line)
578 path = c_style_unescape(path).decode('utf-8')
579 files[path] = f
581 repo = parser.repo
583 committer, date, tz = committer
584 parents = [str(mark_to_rev(p)) for p in parents]
585 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
586 props = {}
587 props['branch-nick'] = repo.nick
589 mtree = CustomTree(repo, revid, parents, files)
590 changes = mtree.iter_changes()
592 repo.lock_write()
593 try:
594 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
595 try:
596 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
597 builder.finish_inventory()
598 builder.commit(data.decode('utf-8', 'replace'))
599 except Exception, e:
600 builder.abort()
601 raise
602 finally:
603 repo.unlock()
605 parsed_refs[ref] = revid
606 marks.new_mark(revid, commit_mark)
608 def parse_reset(parser):
609 global parsed_refs
611 ref = parser[1]
612 parser.next()
614 if ref != 'refs/heads/master':
615 die("bzr doesn't support multiple branches; use 'master'")
617 # ugh
618 if parser.check('commit'):
619 parse_commit(parser)
620 return
621 if not parser.check('from'):
622 return
623 from_mark = parser.get_mark()
624 parser.next()
626 parsed_refs[ref] = mark_to_rev(from_mark)
628 def do_export(parser):
629 global parsed_refs, dirname, peer
631 parser.next()
633 for line in parser.each_block('done'):
634 if parser.check('blob'):
635 parse_blob(parser)
636 elif parser.check('commit'):
637 parse_commit(parser)
638 elif parser.check('reset'):
639 parse_reset(parser)
640 elif parser.check('tag'):
641 pass
642 elif parser.check('feature'):
643 pass
644 else:
645 die('unhandled export command: %s' % line)
647 repo = parser.repo
649 for ref, revid in parsed_refs.iteritems():
650 if ref == 'refs/heads/master':
651 repo.generate_revision_history(revid, marks.get_tip('master'))
652 if peer:
653 try:
654 repo.push(peer, stop_revision=revid)
655 except bzrlib.errors.DivergedBranches:
656 print "error %s non-fast forward" % ref
657 continue
659 try:
660 wt = repo.bzrdir.open_workingtree()
661 wt.update()
662 except bzrlib.errors.NoWorkingTree:
663 pass
665 print "ok %s" % ref
667 print
669 def do_capabilities(parser):
670 global dirname
672 print "import"
673 print "export"
674 print "refspec refs/heads/*:%s/heads/*" % prefix
675 print "refspec refs/tags/*:%s/tags/*" % prefix
677 path = os.path.join(dirname, 'marks-git')
679 if os.path.exists(path):
680 print "*import-marks %s" % path
681 print "*export-marks %s" % path
683 print
685 def ref_is_valid(name):
686 return not True in [c in name for c in '~^: \\']
688 def do_list(parser):
689 global tags
690 print "? refs/heads/%s" % 'master'
692 branch = parser.repo
693 branch.lock_read()
694 for tag, revid in branch.tags.get_tag_dict().items():
695 try:
696 branch.revision_id_to_dotted_revno(revid)
697 except bzrlib.errors.NoSuchRevision:
698 continue
699 if not ref_is_valid(tag):
700 continue
701 print "? refs/tags/%s" % tag
702 tags[tag] = revid
703 branch.unlock()
704 print "@refs/heads/%s HEAD" % 'master'
705 print
707 def get_repo(url, alias):
708 global dirname, peer
710 origin = bzrlib.bzrdir.BzrDir.open(url)
711 branch = origin.open_branch()
713 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
714 clone_path = os.path.join(dirname, 'clone')
715 remote_branch = branch
716 if os.path.exists(clone_path):
717 # pull
718 d = bzrlib.bzrdir.BzrDir.open(clone_path)
719 branch = d.open_branch()
720 try:
721 result = branch.pull(remote_branch, [], None, False)
722 except bzrlib.errors.DivergedBranches:
723 # use remote branch for now
724 peer = None
725 return remote_branch
726 else:
727 # clone
728 d = origin.sprout(clone_path, None,
729 hardlink=True, create_tree_if_local=False,
730 source_branch=remote_branch)
731 branch = d.open_branch()
732 branch.bind(remote_branch)
734 peer = remote_branch
735 else:
736 peer = None
738 return branch
740 def fix_path(alias, orig_url):
741 url = urlparse.urlparse(orig_url, 'file')
742 if url.scheme != 'file' or os.path.isabs(url.path):
743 return
744 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
745 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
746 subprocess.call(cmd)
748 def main(args):
749 global marks, prefix, dirname
750 global tags, filenodes
751 global blob_marks
752 global parsed_refs
753 global files_cache
754 global is_tmp
756 alias = args[1]
757 url = args[2]
759 tags = {}
760 filenodes = {}
761 blob_marks = {}
762 parsed_refs = {}
763 files_cache = {}
764 marks = None
766 if alias[5:] == url:
767 is_tmp = True
768 alias = hashlib.sha1(alias).hexdigest()
769 else:
770 is_tmp = False
772 prefix = 'refs/bzr/%s' % alias
773 gitdir = os.environ['GIT_DIR']
774 dirname = os.path.join(gitdir, 'bzr', alias)
776 if not is_tmp:
777 fix_path(alias, url)
779 if not os.path.exists(dirname):
780 os.makedirs(dirname)
782 bzrlib.ui.ui_factory.be_quiet(True)
784 repo = get_repo(url, alias)
786 marks_path = os.path.join(dirname, 'marks-int')
787 marks = Marks(marks_path)
789 parser = Parser(repo)
790 for line in parser:
791 if parser.check('capabilities'):
792 do_capabilities(parser)
793 elif parser.check('list'):
794 do_list(parser)
795 elif parser.check('import'):
796 do_import(parser)
797 elif parser.check('export'):
798 do_export(parser)
799 else:
800 die('unhandled command: %s' % line)
801 sys.stdout.flush()
803 def bye():
804 if not marks:
805 return
806 if not is_tmp:
807 marks.store()
808 else:
809 shutil.rmtree(dirname)
811 atexit.register(bye)
812 sys.exit(main(sys.argv))