remote-bzr: add support for shared repo
[git.git] / contrib / remote-helpers / git-remote-bzr
blob9fe830e3cdae5f5d24893d78e1283442806cdf7c
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
30 import bzrlib.urlutils
32 import sys
33 import os
34 import json
35 import re
36 import StringIO
37 import atexit, shutil, hashlib, urlparse, subprocess
39 NAME_RE = re.compile('^([^<>]+)')
40 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
41 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
43 def die(msg, *args):
44 sys.stderr.write('ERROR: %s\n' % (msg % args))
45 sys.exit(1)
47 def warn(msg, *args):
48 sys.stderr.write('WARNING: %s\n' % (msg % args))
50 def gittz(tz):
51 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
53 class Marks:
55 def __init__(self, path):
56 self.path = path
57 self.tips = {}
58 self.marks = {}
59 self.rev_marks = {}
60 self.last_mark = 0
61 self.load()
63 def load(self):
64 if not os.path.exists(self.path):
65 return
67 tmp = json.load(open(self.path))
68 self.tips = tmp['tips']
69 self.marks = tmp['marks']
70 self.last_mark = tmp['last-mark']
72 for rev, mark in self.marks.iteritems():
73 self.rev_marks[mark] = rev
75 def dict(self):
76 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
78 def store(self):
79 json.dump(self.dict(), open(self.path, 'w'))
81 def __str__(self):
82 return str(self.dict())
84 def from_rev(self, rev):
85 return self.marks[rev]
87 def to_rev(self, mark):
88 return self.rev_marks[mark]
90 def next_mark(self):
91 self.last_mark += 1
92 return self.last_mark
94 def get_mark(self, rev):
95 self.last_mark += 1
96 self.marks[rev] = self.last_mark
97 return self.last_mark
99 def is_marked(self, rev):
100 return rev in self.marks
102 def new_mark(self, rev, mark):
103 self.marks[rev] = mark
104 self.rev_marks[mark] = rev
105 self.last_mark = mark
107 def get_tip(self, branch):
108 return self.tips.get(branch, None)
110 def set_tip(self, branch, tip):
111 self.tips[branch] = tip
113 class Parser:
115 def __init__(self, repo):
116 self.repo = repo
117 self.line = self.get_line()
119 def get_line(self):
120 return sys.stdin.readline().strip()
122 def __getitem__(self, i):
123 return self.line.split()[i]
125 def check(self, word):
126 return self.line.startswith(word)
128 def each_block(self, separator):
129 while self.line != separator:
130 yield self.line
131 self.line = self.get_line()
133 def __iter__(self):
134 return self.each_block('')
136 def next(self):
137 self.line = self.get_line()
138 if self.line == 'done':
139 self.line = None
141 def get_mark(self):
142 i = self.line.index(':') + 1
143 return int(self.line[i:])
145 def get_data(self):
146 if not self.check('data'):
147 return None
148 i = self.line.index(' ') + 1
149 size = int(self.line[i:])
150 return sys.stdin.read(size)
152 def get_author(self):
153 m = RAW_AUTHOR_RE.match(self.line)
154 if not m:
155 return None
156 _, name, email, date, tz = m.groups()
157 committer = '%s <%s>' % (name, email)
158 tz = int(tz)
159 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
160 return (committer, int(date), tz)
162 def rev_to_mark(rev):
163 global marks
164 return marks.from_rev(rev)
166 def mark_to_rev(mark):
167 global marks
168 return marks.to_rev(mark)
170 def fixup_user(user):
171 name = mail = None
172 user = user.replace('"', '')
173 m = AUTHOR_RE.match(user)
174 if m:
175 name = m.group(1)
176 mail = m.group(2).strip()
177 else:
178 m = NAME_RE.match(user)
179 if m:
180 name = m.group(1).strip()
182 return '%s <%s>' % (name, mail)
184 def get_filechanges(cur, prev):
185 modified = {}
186 removed = {}
188 changes = cur.changes_from(prev)
190 def u(s):
191 return s.encode('utf-8')
193 for path, fid, kind in changes.added:
194 modified[u(path)] = fid
195 for path, fid, kind in changes.removed:
196 removed[u(path)] = None
197 for path, fid, kind, mod, _ in changes.modified:
198 modified[u(path)] = fid
199 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
200 removed[u(oldpath)] = None
201 if kind == 'directory':
202 lst = cur.list_files(from_dir=newpath, recursive=True)
203 for path, file_class, kind, fid, entry in lst:
204 if kind != 'directory':
205 modified[u(newpath + '/' + path)] = fid
206 else:
207 modified[u(newpath)] = fid
209 return modified, removed
211 def export_files(tree, files):
212 global marks, filenodes
214 final = []
215 for path, fid in files.iteritems():
216 kind = tree.kind(fid)
218 h = tree.get_file_sha1(fid)
220 if kind == 'symlink':
221 d = tree.get_symlink_target(fid)
222 mode = '120000'
223 elif kind == 'file':
225 if tree.is_executable(fid):
226 mode = '100755'
227 else:
228 mode = '100644'
230 # is the blob already exported?
231 if h in filenodes:
232 mark = filenodes[h]
233 final.append((mode, mark, path))
234 continue
236 d = tree.get_file_text(fid)
237 elif kind == 'directory':
238 continue
239 else:
240 die("Unhandled kind '%s' for path '%s'" % (kind, path))
242 mark = marks.next_mark()
243 filenodes[h] = mark
245 print "blob"
246 print "mark :%u" % mark
247 print "data %d" % len(d)
248 print d
250 final.append((mode, mark, path))
252 return final
254 def export_branch(repo, name):
255 global prefix
257 ref = '%s/heads/%s' % (prefix, name)
258 tip = marks.get_tip(name)
260 branch = branches[name]
261 repo = branch.repository
263 branch.lock_read()
264 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
265 count = 0
267 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
269 for revid in revs:
271 rev = repo.get_revision(revid)
273 parents = rev.parent_ids
274 time = rev.timestamp
275 tz = rev.timezone
276 committer = rev.committer.encode('utf-8')
277 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
278 authors = rev.get_apparent_authors()
279 if authors:
280 author = authors[0].encode('utf-8')
281 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
282 else:
283 author = committer
284 msg = rev.message.encode('utf-8')
286 msg += '\n'
288 if len(parents) == 0:
289 parent = bzrlib.revision.NULL_REVISION
290 else:
291 parent = parents[0]
293 cur_tree = repo.revision_tree(revid)
294 prev = repo.revision_tree(parent)
295 modified, removed = get_filechanges(cur_tree, prev)
297 modified_final = export_files(cur_tree, modified)
299 if len(parents) == 0:
300 print 'reset %s' % ref
302 print "commit %s" % ref
303 print "mark :%d" % (marks.get_mark(revid))
304 print "author %s" % (author)
305 print "committer %s" % (committer)
306 print "data %d" % (len(msg))
307 print msg
309 for i, p in enumerate(parents):
310 try:
311 m = rev_to_mark(p)
312 except KeyError:
313 # ghost?
314 continue
315 if i == 0:
316 print "from :%s" % m
317 else:
318 print "merge :%s" % m
320 for f in removed:
321 print "D %s" % (f,)
322 for f in modified_final:
323 print "M %s :%u %s" % f
324 print
326 count += 1
327 if (count % 100 == 0):
328 print "progress revision %s '%s' (%d/%d)" % (revid, name, count, len(revs))
329 print "#############################################################"
331 branch.unlock()
333 revid = branch.last_revision()
335 # make sure the ref is updated
336 print "reset %s" % ref
337 print "from :%u" % rev_to_mark(revid)
338 print
340 marks.set_tip(name, revid)
342 def export_tag(repo, name):
343 global tags, prefix
345 ref = '%s/tags/%s' % (prefix, name)
346 print "reset %s" % ref
347 print "from :%u" % rev_to_mark(tags[name])
348 print
350 def do_import(parser):
351 global dirname
353 repo = parser.repo
354 path = os.path.join(dirname, 'marks-git')
356 print "feature done"
357 if os.path.exists(path):
358 print "feature import-marks=%s" % path
359 print "feature export-marks=%s" % path
360 print "feature force"
361 sys.stdout.flush()
363 while parser.check('import'):
364 ref = parser[1]
365 if ref.startswith('refs/heads/'):
366 name = ref[len('refs/heads/'):]
367 export_branch(repo, name)
368 if ref.startswith('refs/tags/'):
369 name = ref[len('refs/tags/'):]
370 export_tag(repo, name)
371 parser.next()
373 print 'done'
375 sys.stdout.flush()
377 def parse_blob(parser):
378 global blob_marks
380 parser.next()
381 mark = parser.get_mark()
382 parser.next()
383 data = parser.get_data()
384 blob_marks[mark] = data
385 parser.next()
387 class CustomTree():
389 def __init__(self, branch, revid, parents, files):
390 global files_cache
392 self.updates = {}
393 self.branch = branch
395 def copy_tree(revid):
396 files = files_cache[revid] = {}
397 branch.lock_read()
398 tree = branch.repository.revision_tree(revid)
399 try:
400 for path, entry in tree.iter_entries_by_dir():
401 files[path] = [entry.file_id, None]
402 finally:
403 branch.unlock()
404 return files
406 if len(parents) == 0:
407 self.base_id = bzrlib.revision.NULL_REVISION
408 self.base_files = {}
409 else:
410 self.base_id = parents[0]
411 self.base_files = files_cache.get(self.base_id, None)
412 if not self.base_files:
413 self.base_files = copy_tree(self.base_id)
415 self.files = files_cache[revid] = self.base_files.copy()
416 self.rev_files = {}
418 for path, data in self.files.iteritems():
419 fid, mark = data
420 self.rev_files[fid] = [path, mark]
422 for path, f in files.iteritems():
423 fid, mark = self.files.get(path, [None, None])
424 if not fid:
425 fid = bzrlib.generate_ids.gen_file_id(path)
426 f['path'] = path
427 self.rev_files[fid] = [path, mark]
428 self.updates[fid] = f
430 def last_revision(self):
431 return self.base_id
433 def iter_changes(self):
434 changes = []
436 def get_parent(dirname, basename):
437 parent_fid, mark = self.base_files.get(dirname, [None, None])
438 if parent_fid:
439 return parent_fid
440 parent_fid, mark = self.files.get(dirname, [None, None])
441 if parent_fid:
442 return parent_fid
443 if basename == '':
444 return None
445 fid = bzrlib.generate_ids.gen_file_id(path)
446 add_entry(fid, dirname, 'directory')
447 return fid
449 def add_entry(fid, path, kind, mode = None):
450 dirname, basename = os.path.split(path)
451 parent_fid = get_parent(dirname, basename)
453 executable = False
454 if mode == '100755':
455 executable = True
456 elif mode == '120000':
457 kind = 'symlink'
459 change = (fid,
460 (None, path),
461 True,
462 (False, True),
463 (None, parent_fid),
464 (None, basename),
465 (None, kind),
466 (None, executable))
467 self.files[path] = [change[0], None]
468 changes.append(change)
470 def update_entry(fid, path, kind, mode = None):
471 dirname, basename = os.path.split(path)
472 parent_fid = get_parent(dirname, basename)
474 executable = False
475 if mode == '100755':
476 executable = True
477 elif mode == '120000':
478 kind = 'symlink'
480 change = (fid,
481 (path, path),
482 True,
483 (True, True),
484 (None, parent_fid),
485 (None, basename),
486 (None, kind),
487 (None, executable))
488 self.files[path] = [change[0], None]
489 changes.append(change)
491 def remove_entry(fid, path, kind):
492 dirname, basename = os.path.split(path)
493 parent_fid = get_parent(dirname, basename)
494 change = (fid,
495 (path, None),
496 True,
497 (True, False),
498 (parent_fid, None),
499 (None, None),
500 (None, None),
501 (None, None))
502 del self.files[path]
503 changes.append(change)
505 for fid, f in self.updates.iteritems():
506 path = f['path']
508 if 'deleted' in f:
509 remove_entry(fid, path, 'file')
510 continue
512 if path in self.base_files:
513 update_entry(fid, path, 'file', f['mode'])
514 else:
515 add_entry(fid, path, 'file', f['mode'])
517 self.files[path][1] = f['mark']
518 self.rev_files[fid][1] = f['mark']
520 return changes
522 def get_content(self, file_id):
523 path, mark = self.rev_files[file_id]
524 if mark:
525 return blob_marks[mark]
527 # last resort
528 tree = self.branch.repository.revision_tree(self.base_id)
529 return tree.get_file_text(file_id)
531 def get_file_with_stat(self, file_id, path=None):
532 content = self.get_content(file_id)
533 return (StringIO.StringIO(content), None)
535 def get_symlink_target(self, file_id):
536 return self.get_content(file_id)
538 def id2path(self, file_id):
539 path, mark = self.rev_files[file_id]
540 return path
542 def c_style_unescape(string):
543 if string[0] == string[-1] == '"':
544 return string.decode('string-escape')[1:-1]
545 return string
547 def parse_commit(parser):
548 global marks, blob_marks, parsed_refs
549 global mode
551 parents = []
553 ref = parser[1]
554 parser.next()
556 if ref.startswith('refs/heads/'):
557 name = ref[len('refs/heads/'):]
558 branch = branches[name]
559 else:
560 die('unknown ref')
562 commit_mark = parser.get_mark()
563 parser.next()
564 author = parser.get_author()
565 parser.next()
566 committer = parser.get_author()
567 parser.next()
568 data = parser.get_data()
569 parser.next()
570 if parser.check('from'):
571 parents.append(parser.get_mark())
572 parser.next()
573 while parser.check('merge'):
574 parents.append(parser.get_mark())
575 parser.next()
577 # fast-export adds an extra newline
578 if data[-1] == '\n':
579 data = data[:-1]
581 files = {}
583 for line in parser:
584 if parser.check('M'):
585 t, m, mark_ref, path = line.split(' ', 3)
586 mark = int(mark_ref[1:])
587 f = { 'mode' : m, 'mark' : mark }
588 elif parser.check('D'):
589 t, path = line.split(' ')
590 f = { 'deleted' : True }
591 else:
592 die('Unknown file command: %s' % line)
593 path = c_style_unescape(path).decode('utf-8')
594 files[path] = f
596 committer, date, tz = committer
597 parents = [str(mark_to_rev(p)) for p in parents]
598 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
599 props = {}
600 props['branch-nick'] = branch.nick
602 mtree = CustomTree(branch, revid, parents, files)
603 changes = mtree.iter_changes()
605 branch.lock_write()
606 try:
607 builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid)
608 try:
609 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
610 builder.finish_inventory()
611 builder.commit(data.decode('utf-8', 'replace'))
612 except Exception, e:
613 builder.abort()
614 raise
615 finally:
616 branch.unlock()
618 parsed_refs[ref] = revid
619 marks.new_mark(revid, commit_mark)
621 def parse_reset(parser):
622 global parsed_refs
624 ref = parser[1]
625 parser.next()
627 # ugh
628 if parser.check('commit'):
629 parse_commit(parser)
630 return
631 if not parser.check('from'):
632 return
633 from_mark = parser.get_mark()
634 parser.next()
636 parsed_refs[ref] = mark_to_rev(from_mark)
638 def do_export(parser):
639 global parsed_refs, dirname
641 parser.next()
643 for line in parser.each_block('done'):
644 if parser.check('blob'):
645 parse_blob(parser)
646 elif parser.check('commit'):
647 parse_commit(parser)
648 elif parser.check('reset'):
649 parse_reset(parser)
650 elif parser.check('tag'):
651 pass
652 elif parser.check('feature'):
653 pass
654 else:
655 die('unhandled export command: %s' % line)
657 for ref, revid in parsed_refs.iteritems():
658 name = ref[len('refs/heads/'):]
659 branch = branches[name]
660 branch.generate_revision_history(revid, marks.get_tip(name))
662 if name in peers:
663 peer = peers[name]
664 try:
665 peer.bzrdir.push_branch(branch, revision_id=revid)
666 except bzrlib.errors.DivergedBranches:
667 print "error %s non-fast forward" % ref
668 continue
670 try:
671 wt = branch.bzrdir.open_workingtree()
672 wt.update()
673 except bzrlib.errors.NoWorkingTree:
674 pass
676 print "ok %s" % ref
678 print
680 def do_capabilities(parser):
681 global dirname
683 print "import"
684 print "export"
685 print "refspec refs/heads/*:%s/heads/*" % prefix
686 print "refspec refs/tags/*:%s/tags/*" % prefix
688 path = os.path.join(dirname, 'marks-git')
690 if os.path.exists(path):
691 print "*import-marks %s" % path
692 print "*export-marks %s" % path
694 print
696 def ref_is_valid(name):
697 return not True in [c in name for c in '~^: \\']
699 def do_list(parser):
700 global tags
702 master_branch = None
704 for name in branches:
705 if not master_branch:
706 master_branch = name
707 print "? refs/heads/%s" % name
709 branch = branches[master_branch]
710 branch.lock_read()
711 for tag, revid in branch.tags.get_tag_dict().items():
712 try:
713 branch.revision_id_to_dotted_revno(revid)
714 except bzrlib.errors.NoSuchRevision:
715 continue
716 if not ref_is_valid(tag):
717 continue
718 print "? refs/tags/%s" % tag
719 tags[tag] = revid
720 branch.unlock()
722 print "@refs/heads/%s HEAD" % master_branch
723 print
725 def get_remote_branch(origin, remote_branch, name):
726 global dirname, peers
728 branch_path = os.path.join(dirname, 'clone', name)
729 if os.path.exists(branch_path):
730 # pull
731 d = bzrlib.bzrdir.BzrDir.open(branch_path)
732 branch = d.open_branch()
733 try:
734 branch.pull(remote_branch, [], None, False)
735 except bzrlib.errors.DivergedBranches:
736 # use remote branch for now
737 return remote_branch
738 else:
739 # clone
740 d = origin.sprout(branch_path, None,
741 hardlink=True, create_tree_if_local=False,
742 force_new_repo=False,
743 source_branch=remote_branch)
744 branch = d.open_branch()
746 return branch
748 def get_repo(url, alias):
749 global dirname, peer, branches
751 normal_url = bzrlib.urlutils.normalize_url(url)
752 origin = bzrlib.bzrdir.BzrDir.open(url)
753 is_local = isinstance(origin.transport, bzrlib.transport.local.LocalTransport)
755 shared_path = os.path.join(gitdir, 'bzr')
756 try:
757 shared_dir = bzrlib.bzrdir.BzrDir.open(shared_path)
758 except bzrlib.errors.NotBranchError:
759 shared_dir = bzrlib.bzrdir.BzrDir.create(shared_path)
760 try:
761 shared_repo = shared_dir.open_repository()
762 except bzrlib.errors.NoRepositoryPresent:
763 shared_repo = shared_dir.create_repository(shared=True)
765 if not is_local:
766 clone_path = os.path.join(dirname, 'clone')
767 if not os.path.exists(clone_path):
768 os.mkdir(clone_path)
770 try:
771 repo = origin.open_repository()
772 except bzrlib.errors.NoRepositoryPresent:
773 # branch
775 name = 'master'
776 branch = origin.open_branch()
778 if not is_local:
779 peers[name] = branch
780 branches[name] = get_remote_branch(origin, branch, name)
781 else:
782 branches[name] = branch
784 return branch.repository
785 else:
786 # repository
788 for branch in repo.find_branches():
790 name = repo.user_transport.relpath(branch.base)
791 name = name if name != '' else 'master'
792 name = name.replace('/', '+')
794 if not is_local:
795 peers[name] = branch
796 branches[name] = get_remote_branch(origin, branch, name)
797 else:
798 branches[name] = branch
800 return repo
802 def fix_path(alias, orig_url):
803 url = urlparse.urlparse(orig_url, 'file')
804 if url.scheme != 'file' or os.path.isabs(url.path):
805 return
806 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
807 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
808 subprocess.call(cmd)
810 def main(args):
811 global marks, prefix, gitdir, dirname
812 global tags, filenodes
813 global blob_marks
814 global parsed_refs
815 global files_cache
816 global is_tmp
817 global branches, peers
819 alias = args[1]
820 url = args[2]
822 tags = {}
823 filenodes = {}
824 blob_marks = {}
825 parsed_refs = {}
826 files_cache = {}
827 marks = None
828 branches = {}
829 peers = {}
831 if alias[5:] == url:
832 is_tmp = True
833 alias = hashlib.sha1(alias).hexdigest()
834 else:
835 is_tmp = False
837 prefix = 'refs/bzr/%s' % alias
838 gitdir = os.environ['GIT_DIR']
839 dirname = os.path.join(gitdir, 'bzr', alias)
841 if not is_tmp:
842 fix_path(alias, url)
844 if not os.path.exists(dirname):
845 os.makedirs(dirname)
847 bzrlib.ui.ui_factory.be_quiet(True)
849 repo = get_repo(url, alias)
851 marks_path = os.path.join(dirname, 'marks-int')
852 marks = Marks(marks_path)
854 parser = Parser(repo)
855 for line in parser:
856 if parser.check('capabilities'):
857 do_capabilities(parser)
858 elif parser.check('list'):
859 do_list(parser)
860 elif parser.check('import'):
861 do_import(parser)
862 elif parser.check('export'):
863 do_export(parser)
864 else:
865 die('unhandled command: %s' % line)
866 sys.stdout.flush()
868 def bye():
869 if not marks:
870 return
871 if not is_tmp:
872 marks.store()
873 else:
874 shutil.rmtree(dirname)
876 atexit.register(bye)
877 sys.exit(main(sys.argv))