remote-bzr: add option to specify branches
[git/mingw/j6t.git] / contrib / remote-helpers / git-remote-bzr
blobd284afc0d3efe3a47216f0addd5b0da0c03e8096
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
16 # If you want to specify which branches you want track (per repo):
17 # git config remote-bzr.branches 'trunk, devel, test'
20 import sys
22 import bzrlib
23 if hasattr(bzrlib, "initialize"):
24 bzrlib.initialize()
26 import bzrlib.plugin
27 bzrlib.plugin.load_plugins()
29 import bzrlib.generate_ids
30 import bzrlib.transport
31 import bzrlib.errors
32 import bzrlib.ui
33 import bzrlib.urlutils
35 import sys
36 import os
37 import json
38 import re
39 import StringIO
40 import atexit, shutil, hashlib, urlparse, subprocess
42 NAME_RE = re.compile('^([^<>]+)')
43 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
44 EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)')
45 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
47 def die(msg, *args):
48 sys.stderr.write('ERROR: %s\n' % (msg % args))
49 sys.exit(1)
51 def warn(msg, *args):
52 sys.stderr.write('WARNING: %s\n' % (msg % args))
54 def gittz(tz):
55 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
57 def get_config(config):
58 cmd = ['git', 'config', '--get', config]
59 process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
60 output, _ = process.communicate()
61 return output
63 class Marks:
65 def __init__(self, path):
66 self.path = path
67 self.tips = {}
68 self.marks = {}
69 self.rev_marks = {}
70 self.last_mark = 0
71 self.load()
73 def load(self):
74 if not os.path.exists(self.path):
75 return
77 tmp = json.load(open(self.path))
78 self.tips = tmp['tips']
79 self.marks = tmp['marks']
80 self.last_mark = tmp['last-mark']
82 for rev, mark in self.marks.iteritems():
83 self.rev_marks[mark] = rev
85 def dict(self):
86 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
88 def store(self):
89 json.dump(self.dict(), open(self.path, 'w'))
91 def __str__(self):
92 return str(self.dict())
94 def from_rev(self, rev):
95 return self.marks[rev]
97 def to_rev(self, mark):
98 return self.rev_marks[mark]
100 def next_mark(self):
101 self.last_mark += 1
102 return self.last_mark
104 def get_mark(self, rev):
105 self.last_mark += 1
106 self.marks[rev] = self.last_mark
107 return self.last_mark
109 def is_marked(self, rev):
110 return rev in self.marks
112 def new_mark(self, rev, mark):
113 self.marks[rev] = mark
114 self.rev_marks[mark] = rev
115 self.last_mark = mark
117 def get_tip(self, branch):
118 return self.tips.get(branch, None)
120 def set_tip(self, branch, tip):
121 self.tips[branch] = tip
123 class Parser:
125 def __init__(self, repo):
126 self.repo = repo
127 self.line = self.get_line()
129 def get_line(self):
130 return sys.stdin.readline().strip()
132 def __getitem__(self, i):
133 return self.line.split()[i]
135 def check(self, word):
136 return self.line.startswith(word)
138 def each_block(self, separator):
139 while self.line != separator:
140 yield self.line
141 self.line = self.get_line()
143 def __iter__(self):
144 return self.each_block('')
146 def next(self):
147 self.line = self.get_line()
148 if self.line == 'done':
149 self.line = None
151 def get_mark(self):
152 i = self.line.index(':') + 1
153 return int(self.line[i:])
155 def get_data(self):
156 if not self.check('data'):
157 return None
158 i = self.line.index(' ') + 1
159 size = int(self.line[i:])
160 return sys.stdin.read(size)
162 def get_author(self):
163 m = RAW_AUTHOR_RE.match(self.line)
164 if not m:
165 return None
166 _, name, email, date, tz = m.groups()
167 committer = '%s <%s>' % (name, email)
168 tz = int(tz)
169 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
170 return (committer, int(date), tz)
172 def rev_to_mark(rev):
173 global marks
174 return marks.from_rev(rev)
176 def mark_to_rev(mark):
177 global marks
178 return marks.to_rev(mark)
180 def fixup_user(user):
181 name = mail = None
182 user = user.replace('"', '')
183 m = AUTHOR_RE.match(user)
184 if m:
185 name = m.group(1)
186 mail = m.group(2).strip()
187 else:
188 m = EMAIL_RE.match(user)
189 if m:
190 name = m.group(1)
191 mail = m.group(2)
192 else:
193 m = NAME_RE.match(user)
194 if m:
195 name = m.group(1).strip()
197 if not name:
198 name = 'unknown'
199 if not mail:
200 mail = 'Unknown'
202 return '%s <%s>' % (name, mail)
204 def get_filechanges(cur, prev):
205 modified = {}
206 removed = {}
208 changes = cur.changes_from(prev)
210 def u(s):
211 return s.encode('utf-8')
213 for path, fid, kind in changes.added:
214 modified[u(path)] = fid
215 for path, fid, kind in changes.removed:
216 removed[u(path)] = None
217 for path, fid, kind, mod, _ in changes.modified:
218 modified[u(path)] = fid
219 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
220 removed[u(oldpath)] = None
221 if kind == 'directory':
222 lst = cur.list_files(from_dir=newpath, recursive=True)
223 for path, file_class, kind, fid, entry in lst:
224 if kind != 'directory':
225 modified[u(newpath + '/' + path)] = fid
226 else:
227 modified[u(newpath)] = fid
229 return modified, removed
231 def export_files(tree, files):
232 global marks, filenodes
234 final = []
235 for path, fid in files.iteritems():
236 kind = tree.kind(fid)
238 h = tree.get_file_sha1(fid)
240 if kind == 'symlink':
241 d = tree.get_symlink_target(fid)
242 mode = '120000'
243 elif kind == 'file':
245 if tree.is_executable(fid):
246 mode = '100755'
247 else:
248 mode = '100644'
250 # is the blob already exported?
251 if h in filenodes:
252 mark = filenodes[h]
253 final.append((mode, mark, path))
254 continue
256 d = tree.get_file_text(fid)
257 elif kind == 'directory':
258 continue
259 else:
260 die("Unhandled kind '%s' for path '%s'" % (kind, path))
262 mark = marks.next_mark()
263 filenodes[h] = mark
265 print "blob"
266 print "mark :%u" % mark
267 print "data %d" % len(d)
268 print d
270 final.append((mode, mark, path))
272 return final
274 def export_branch(repo, name):
275 global prefix
277 ref = '%s/heads/%s' % (prefix, name)
278 tip = marks.get_tip(name)
280 branch = branches[name]
281 repo = branch.repository
283 branch.lock_read()
284 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
285 count = 0
287 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
289 for revid in revs:
291 rev = repo.get_revision(revid)
293 parents = rev.parent_ids
294 time = rev.timestamp
295 tz = rev.timezone
296 committer = rev.committer.encode('utf-8')
297 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
298 authors = rev.get_apparent_authors()
299 if authors:
300 author = authors[0].encode('utf-8')
301 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
302 else:
303 author = committer
304 msg = rev.message.encode('utf-8')
306 msg += '\n'
308 if len(parents) == 0:
309 parent = bzrlib.revision.NULL_REVISION
310 else:
311 parent = parents[0]
313 cur_tree = repo.revision_tree(revid)
314 prev = repo.revision_tree(parent)
315 modified, removed = get_filechanges(cur_tree, prev)
317 modified_final = export_files(cur_tree, modified)
319 if len(parents) == 0:
320 print 'reset %s' % ref
322 print "commit %s" % ref
323 print "mark :%d" % (marks.get_mark(revid))
324 print "author %s" % (author)
325 print "committer %s" % (committer)
326 print "data %d" % (len(msg))
327 print msg
329 for i, p in enumerate(parents):
330 try:
331 m = rev_to_mark(p)
332 except KeyError:
333 # ghost?
334 continue
335 if i == 0:
336 print "from :%s" % m
337 else:
338 print "merge :%s" % m
340 for f in removed:
341 print "D %s" % (f,)
342 for f in modified_final:
343 print "M %s :%u %s" % f
344 print
346 count += 1
347 if (count % 100 == 0):
348 print "progress revision %s '%s' (%d/%d)" % (revid, name, count, len(revs))
349 print "#############################################################"
351 branch.unlock()
353 revid = branch.last_revision()
355 # make sure the ref is updated
356 print "reset %s" % ref
357 print "from :%u" % rev_to_mark(revid)
358 print
360 marks.set_tip(name, revid)
362 def export_tag(repo, name):
363 global tags, prefix
365 ref = '%s/tags/%s' % (prefix, name)
366 print "reset %s" % ref
367 print "from :%u" % rev_to_mark(tags[name])
368 print
370 def do_import(parser):
371 global dirname
373 repo = parser.repo
374 path = os.path.join(dirname, 'marks-git')
376 print "feature done"
377 if os.path.exists(path):
378 print "feature import-marks=%s" % path
379 print "feature export-marks=%s" % path
380 print "feature force"
381 sys.stdout.flush()
383 while parser.check('import'):
384 ref = parser[1]
385 if ref.startswith('refs/heads/'):
386 name = ref[len('refs/heads/'):]
387 export_branch(repo, name)
388 if ref.startswith('refs/tags/'):
389 name = ref[len('refs/tags/'):]
390 export_tag(repo, name)
391 parser.next()
393 print 'done'
395 sys.stdout.flush()
397 def parse_blob(parser):
398 global blob_marks
400 parser.next()
401 mark = parser.get_mark()
402 parser.next()
403 data = parser.get_data()
404 blob_marks[mark] = data
405 parser.next()
407 class CustomTree():
409 def __init__(self, branch, revid, parents, files):
410 global files_cache
412 self.updates = {}
413 self.branch = branch
415 def copy_tree(revid):
416 files = files_cache[revid] = {}
417 branch.lock_read()
418 tree = branch.repository.revision_tree(revid)
419 try:
420 for path, entry in tree.iter_entries_by_dir():
421 files[path] = [entry.file_id, None]
422 finally:
423 branch.unlock()
424 return files
426 if len(parents) == 0:
427 self.base_id = bzrlib.revision.NULL_REVISION
428 self.base_files = {}
429 else:
430 self.base_id = parents[0]
431 self.base_files = files_cache.get(self.base_id, None)
432 if not self.base_files:
433 self.base_files = copy_tree(self.base_id)
435 self.files = files_cache[revid] = self.base_files.copy()
436 self.rev_files = {}
438 for path, data in self.files.iteritems():
439 fid, mark = data
440 self.rev_files[fid] = [path, mark]
442 for path, f in files.iteritems():
443 fid, mark = self.files.get(path, [None, None])
444 if not fid:
445 fid = bzrlib.generate_ids.gen_file_id(path)
446 f['path'] = path
447 self.rev_files[fid] = [path, mark]
448 self.updates[fid] = f
450 def last_revision(self):
451 return self.base_id
453 def iter_changes(self):
454 changes = []
456 def get_parent(dirname, basename):
457 parent_fid, mark = self.base_files.get(dirname, [None, None])
458 if parent_fid:
459 return parent_fid
460 parent_fid, mark = self.files.get(dirname, [None, None])
461 if parent_fid:
462 return parent_fid
463 if basename == '':
464 return None
465 fid = bzrlib.generate_ids.gen_file_id(path)
466 add_entry(fid, dirname, 'directory')
467 return fid
469 def add_entry(fid, path, kind, mode = None):
470 dirname, basename = os.path.split(path)
471 parent_fid = get_parent(dirname, basename)
473 executable = False
474 if mode == '100755':
475 executable = True
476 elif mode == '120000':
477 kind = 'symlink'
479 change = (fid,
480 (None, path),
481 True,
482 (False, True),
483 (None, parent_fid),
484 (None, basename),
485 (None, kind),
486 (None, executable))
487 self.files[path] = [change[0], None]
488 changes.append(change)
490 def update_entry(fid, path, kind, mode = None):
491 dirname, basename = os.path.split(path)
492 parent_fid = get_parent(dirname, basename)
494 executable = False
495 if mode == '100755':
496 executable = True
497 elif mode == '120000':
498 kind = 'symlink'
500 change = (fid,
501 (path, path),
502 True,
503 (True, True),
504 (None, parent_fid),
505 (None, basename),
506 (None, kind),
507 (None, executable))
508 self.files[path] = [change[0], None]
509 changes.append(change)
511 def remove_entry(fid, path, kind):
512 dirname, basename = os.path.split(path)
513 parent_fid = get_parent(dirname, basename)
514 change = (fid,
515 (path, None),
516 True,
517 (True, False),
518 (parent_fid, None),
519 (None, None),
520 (None, None),
521 (None, None))
522 del self.files[path]
523 changes.append(change)
525 for fid, f in self.updates.iteritems():
526 path = f['path']
528 if 'deleted' in f:
529 remove_entry(fid, path, 'file')
530 continue
532 if path in self.base_files:
533 update_entry(fid, path, 'file', f['mode'])
534 else:
535 add_entry(fid, path, 'file', f['mode'])
537 self.files[path][1] = f['mark']
538 self.rev_files[fid][1] = f['mark']
540 return changes
542 def get_content(self, file_id):
543 path, mark = self.rev_files[file_id]
544 if mark:
545 return blob_marks[mark]
547 # last resort
548 tree = self.branch.repository.revision_tree(self.base_id)
549 return tree.get_file_text(file_id)
551 def get_file_with_stat(self, file_id, path=None):
552 content = self.get_content(file_id)
553 return (StringIO.StringIO(content), None)
555 def get_symlink_target(self, file_id):
556 return self.get_content(file_id)
558 def id2path(self, file_id):
559 path, mark = self.rev_files[file_id]
560 return path
562 def c_style_unescape(string):
563 if string[0] == string[-1] == '"':
564 return string.decode('string-escape')[1:-1]
565 return string
567 def parse_commit(parser):
568 global marks, blob_marks, parsed_refs
569 global mode
571 parents = []
573 ref = parser[1]
574 parser.next()
576 if ref.startswith('refs/heads/'):
577 name = ref[len('refs/heads/'):]
578 branch = branches[name]
579 else:
580 die('unknown ref')
582 commit_mark = parser.get_mark()
583 parser.next()
584 author = parser.get_author()
585 parser.next()
586 committer = parser.get_author()
587 parser.next()
588 data = parser.get_data()
589 parser.next()
590 if parser.check('from'):
591 parents.append(parser.get_mark())
592 parser.next()
593 while parser.check('merge'):
594 parents.append(parser.get_mark())
595 parser.next()
597 # fast-export adds an extra newline
598 if data[-1] == '\n':
599 data = data[:-1]
601 files = {}
603 for line in parser:
604 if parser.check('M'):
605 t, m, mark_ref, path = line.split(' ', 3)
606 mark = int(mark_ref[1:])
607 f = { 'mode' : m, 'mark' : mark }
608 elif parser.check('D'):
609 t, path = line.split(' ')
610 f = { 'deleted' : True }
611 else:
612 die('Unknown file command: %s' % line)
613 path = c_style_unescape(path).decode('utf-8')
614 files[path] = f
616 committer, date, tz = committer
617 parents = [str(mark_to_rev(p)) for p in parents]
618 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
619 props = {}
620 props['branch-nick'] = branch.nick
622 mtree = CustomTree(branch, revid, parents, files)
623 changes = mtree.iter_changes()
625 branch.lock_write()
626 try:
627 builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid)
628 try:
629 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
630 builder.finish_inventory()
631 builder.commit(data.decode('utf-8', 'replace'))
632 except Exception, e:
633 builder.abort()
634 raise
635 finally:
636 branch.unlock()
638 parsed_refs[ref] = revid
639 marks.new_mark(revid, commit_mark)
641 def parse_reset(parser):
642 global parsed_refs
644 ref = parser[1]
645 parser.next()
647 # ugh
648 if parser.check('commit'):
649 parse_commit(parser)
650 return
651 if not parser.check('from'):
652 return
653 from_mark = parser.get_mark()
654 parser.next()
656 parsed_refs[ref] = mark_to_rev(from_mark)
658 def do_export(parser):
659 global parsed_refs, dirname
661 parser.next()
663 for line in parser.each_block('done'):
664 if parser.check('blob'):
665 parse_blob(parser)
666 elif parser.check('commit'):
667 parse_commit(parser)
668 elif parser.check('reset'):
669 parse_reset(parser)
670 elif parser.check('tag'):
671 pass
672 elif parser.check('feature'):
673 pass
674 else:
675 die('unhandled export command: %s' % line)
677 for ref, revid in parsed_refs.iteritems():
678 name = ref[len('refs/heads/'):]
679 branch = branches[name]
680 branch.generate_revision_history(revid, marks.get_tip(name))
682 if name in peers:
683 peer = peers[name]
684 try:
685 peer.bzrdir.push_branch(branch, revision_id=revid)
686 except bzrlib.errors.DivergedBranches:
687 print "error %s non-fast forward" % ref
688 continue
690 try:
691 wt = branch.bzrdir.open_workingtree()
692 wt.update()
693 except bzrlib.errors.NoWorkingTree:
694 pass
696 print "ok %s" % ref
698 print
700 def do_capabilities(parser):
701 global dirname
703 print "import"
704 print "export"
705 print "refspec refs/heads/*:%s/heads/*" % prefix
706 print "refspec refs/tags/*:%s/tags/*" % prefix
708 path = os.path.join(dirname, 'marks-git')
710 if os.path.exists(path):
711 print "*import-marks %s" % path
712 print "*export-marks %s" % path
714 print
716 def ref_is_valid(name):
717 return not True in [c in name for c in '~^: \\']
719 def do_list(parser):
720 global tags
722 master_branch = None
724 for name in branches:
725 if not master_branch:
726 master_branch = name
727 print "? refs/heads/%s" % name
729 branch = branches[master_branch]
730 branch.lock_read()
731 for tag, revid in branch.tags.get_tag_dict().items():
732 try:
733 branch.revision_id_to_dotted_revno(revid)
734 except bzrlib.errors.NoSuchRevision:
735 continue
736 if not ref_is_valid(tag):
737 continue
738 print "? refs/tags/%s" % tag
739 tags[tag] = revid
740 branch.unlock()
742 print "@refs/heads/%s HEAD" % master_branch
743 print
745 def get_remote_branch(origin, remote_branch, name):
746 global dirname, peers
748 branch_path = os.path.join(dirname, 'clone', name)
749 if os.path.exists(branch_path):
750 # pull
751 d = bzrlib.bzrdir.BzrDir.open(branch_path)
752 branch = d.open_branch()
753 try:
754 branch.pull(remote_branch, [], None, False)
755 except bzrlib.errors.DivergedBranches:
756 # use remote branch for now
757 return remote_branch
758 else:
759 # clone
760 d = origin.sprout(branch_path, None,
761 hardlink=True, create_tree_if_local=False,
762 force_new_repo=False,
763 source_branch=remote_branch)
764 branch = d.open_branch()
766 return branch
768 def find_branches(repo, wanted):
769 transport = repo.user_transport
771 for fn in transport.iter_files_recursive():
772 if not fn.endswith('.bzr/branch-format'):
773 continue
775 name = subdir = fn[:-len('/.bzr/branch-format')]
776 name = name if name != '' else 'master'
777 name = name.replace('/', '+')
779 if wanted and not name in wanted:
780 continue
782 try:
783 cur = transport.clone(subdir)
784 branch = bzrlib.branch.Branch.open_from_transport(cur)
785 except bzrlib.errors.NotBranchError:
786 continue
787 else:
788 yield name, branch
790 def get_repo(url, alias):
791 global dirname, peer, branches
793 normal_url = bzrlib.urlutils.normalize_url(url)
794 origin = bzrlib.bzrdir.BzrDir.open(url)
795 is_local = isinstance(origin.transport, bzrlib.transport.local.LocalTransport)
797 shared_path = os.path.join(gitdir, 'bzr')
798 try:
799 shared_dir = bzrlib.bzrdir.BzrDir.open(shared_path)
800 except bzrlib.errors.NotBranchError:
801 shared_dir = bzrlib.bzrdir.BzrDir.create(shared_path)
802 try:
803 shared_repo = shared_dir.open_repository()
804 except bzrlib.errors.NoRepositoryPresent:
805 shared_repo = shared_dir.create_repository(shared=True)
807 if not is_local:
808 clone_path = os.path.join(dirname, 'clone')
809 if not os.path.exists(clone_path):
810 os.mkdir(clone_path)
812 try:
813 repo = origin.open_repository()
814 except bzrlib.errors.NoRepositoryPresent:
815 # branch
817 name = 'master'
818 branch = origin.open_branch()
820 if not is_local:
821 peers[name] = branch
822 branches[name] = get_remote_branch(origin, branch, name)
823 else:
824 branches[name] = branch
826 return branch.repository
827 else:
828 # repository
830 wanted = get_config('remote-bzr.branches').rstrip().split(', ')
831 # stupid python
832 wanted = [e for e in wanted if e]
834 for name, branch in find_branches(repo, wanted):
836 if not is_local:
837 peers[name] = branch
838 branches[name] = get_remote_branch(origin, branch, name)
839 else:
840 branches[name] = branch
842 return repo
844 def fix_path(alias, orig_url):
845 url = urlparse.urlparse(orig_url, 'file')
846 if url.scheme != 'file' or os.path.isabs(url.path):
847 return
848 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
849 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
850 subprocess.call(cmd)
852 def main(args):
853 global marks, prefix, gitdir, dirname
854 global tags, filenodes
855 global blob_marks
856 global parsed_refs
857 global files_cache
858 global is_tmp
859 global branches, peers
861 alias = args[1]
862 url = args[2]
864 tags = {}
865 filenodes = {}
866 blob_marks = {}
867 parsed_refs = {}
868 files_cache = {}
869 marks = None
870 branches = {}
871 peers = {}
873 if alias[5:] == url:
874 is_tmp = True
875 alias = hashlib.sha1(alias).hexdigest()
876 else:
877 is_tmp = False
879 prefix = 'refs/bzr/%s' % alias
880 gitdir = os.environ['GIT_DIR']
881 dirname = os.path.join(gitdir, 'bzr', alias)
883 if not is_tmp:
884 fix_path(alias, url)
886 if not os.path.exists(dirname):
887 os.makedirs(dirname)
889 bzrlib.ui.ui_factory.be_quiet(True)
891 repo = get_repo(url, alias)
893 marks_path = os.path.join(dirname, 'marks-int')
894 marks = Marks(marks_path)
896 parser = Parser(repo)
897 for line in parser:
898 if parser.check('capabilities'):
899 do_capabilities(parser)
900 elif parser.check('list'):
901 do_list(parser)
902 elif parser.check('import'):
903 do_import(parser)
904 elif parser.check('export'):
905 do_export(parser)
906 else:
907 die('unhandled command: %s' % line)
908 sys.stdout.flush()
910 def bye():
911 if not marks:
912 return
913 if not is_tmp:
914 marks.store()
915 else:
916 shutil.rmtree(dirname)
918 atexit.register(bye)
919 sys.exit(main(sys.argv))