remote-bzr: use branch variable when appropriate
[alt-git.git] / contrib / remote-helpers / git-remote-bzr
blob0c67236487c86a9ddd9f2809f19421d65191011f
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
52 class Marks:
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
62 def load(self):
63 if not os.path.exists(self.path):
64 return
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
80 def __str__(self):
81 return str(self.dict())
83 def from_rev(self, rev):
84 return self.marks[rev]
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
98 def is_marked(self, rev):
99 return rev in self.marks
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
112 class Parser:
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
118 def get_line(self):
119 return sys.stdin.readline().strip()
121 def __getitem__(self, i):
122 return self.line.split()[i]
124 def check(self, word):
125 return self.line.startswith(word)
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
132 def __iter__(self):
133 return self.each_block('')
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
181 return '%s <%s>' % (name, mail)
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
187 changes = cur.changes_from(prev)
189 def u(s):
190 return s.encode('utf-8')
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
208 return modified, removed
210 def export_files(tree, files):
211 global marks, filenodes
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
217 h = tree.get_file_sha1(fid)
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
241 mark = marks.next_mark()
242 filenodes[h] = mark
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
249 final.append((mode, mark, path))
251 return final
253 def export_branch(branch, name):
254 global prefix
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
259 repo = branch.repository
261 branch.lock_read()
262 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
263 count = 0
265 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
267 for revid in revs:
269 rev = repo.get_revision(revid)
271 parents = rev.parent_ids
272 time = rev.timestamp
273 tz = rev.timezone
274 committer = rev.committer.encode('utf-8')
275 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
276 authors = rev.get_apparent_authors()
277 if authors:
278 author = authors[0].encode('utf-8')
279 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
280 else:
281 author = committer
282 msg = rev.message.encode('utf-8')
284 msg += '\n'
286 if len(parents) == 0:
287 parent = bzrlib.revision.NULL_REVISION
288 else:
289 parent = parents[0]
291 cur_tree = repo.revision_tree(revid)
292 prev = repo.revision_tree(parent)
293 modified, removed = get_filechanges(cur_tree, prev)
295 modified_final = export_files(cur_tree, modified)
297 if len(parents) == 0:
298 print 'reset %s' % ref
300 print "commit %s" % ref
301 print "mark :%d" % (marks.get_mark(revid))
302 print "author %s" % (author)
303 print "committer %s" % (committer)
304 print "data %d" % (len(msg))
305 print msg
307 for i, p in enumerate(parents):
308 try:
309 m = rev_to_mark(p)
310 except KeyError:
311 # ghost?
312 continue
313 if i == 0:
314 print "from :%s" % m
315 else:
316 print "merge :%s" % m
318 for f in removed:
319 print "D %s" % (f,)
320 for f in modified_final:
321 print "M %s :%u %s" % f
322 print
324 count += 1
325 if (count % 100 == 0):
326 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
327 print "#############################################################"
329 branch.unlock()
331 revid = branch.last_revision()
333 # make sure the ref is updated
334 print "reset %s" % ref
335 print "from :%u" % rev_to_mark(revid)
336 print
338 marks.set_tip(name, revid)
340 def export_tag(repo, name):
341 global tags, prefix
343 ref = '%s/tags/%s' % (prefix, name)
344 print "reset %s" % ref
345 print "from :%u" % rev_to_mark(tags[name])
346 print
348 def do_import(parser):
349 global dirname
351 branch = parser.repo
352 path = os.path.join(dirname, 'marks-git')
354 print "feature done"
355 if os.path.exists(path):
356 print "feature import-marks=%s" % path
357 print "feature export-marks=%s" % path
358 print "feature force"
359 sys.stdout.flush()
361 while parser.check('import'):
362 ref = parser[1]
363 if ref.startswith('refs/heads/'):
364 name = ref[len('refs/heads/'):]
365 export_branch(branch, name)
366 if ref.startswith('refs/tags/'):
367 name = ref[len('refs/tags/'):]
368 export_tag(branch, name)
369 parser.next()
371 print 'done'
373 sys.stdout.flush()
375 def parse_blob(parser):
376 global blob_marks
378 parser.next()
379 mark = parser.get_mark()
380 parser.next()
381 data = parser.get_data()
382 blob_marks[mark] = data
383 parser.next()
385 class CustomTree():
387 def __init__(self, branch, revid, parents, files):
388 global files_cache
390 self.updates = {}
391 self.branch = branch
393 def copy_tree(revid):
394 files = files_cache[revid] = {}
395 branch.lock_read()
396 tree = branch.repository.revision_tree(revid)
397 try:
398 for path, entry in tree.iter_entries_by_dir():
399 files[path] = [entry.file_id, None]
400 finally:
401 branch.unlock()
402 return files
404 if len(parents) == 0:
405 self.base_id = bzrlib.revision.NULL_REVISION
406 self.base_files = {}
407 else:
408 self.base_id = parents[0]
409 self.base_files = files_cache.get(self.base_id, None)
410 if not self.base_files:
411 self.base_files = copy_tree(self.base_id)
413 self.files = files_cache[revid] = self.base_files.copy()
414 self.rev_files = {}
416 for path, data in self.files.iteritems():
417 fid, mark = data
418 self.rev_files[fid] = [path, mark]
420 for path, f in files.iteritems():
421 fid, mark = self.files.get(path, [None, None])
422 if not fid:
423 fid = bzrlib.generate_ids.gen_file_id(path)
424 f['path'] = path
425 self.rev_files[fid] = [path, mark]
426 self.updates[fid] = f
428 def last_revision(self):
429 return self.base_id
431 def iter_changes(self):
432 changes = []
434 def get_parent(dirname, basename):
435 parent_fid, mark = self.base_files.get(dirname, [None, None])
436 if parent_fid:
437 return parent_fid
438 parent_fid, mark = self.files.get(dirname, [None, None])
439 if parent_fid:
440 return parent_fid
441 if basename == '':
442 return None
443 fid = bzrlib.generate_ids.gen_file_id(path)
444 add_entry(fid, dirname, 'directory')
445 return fid
447 def add_entry(fid, path, kind, mode = None):
448 dirname, basename = os.path.split(path)
449 parent_fid = get_parent(dirname, basename)
451 executable = False
452 if mode == '100755':
453 executable = True
454 elif mode == '120000':
455 kind = 'symlink'
457 change = (fid,
458 (None, path),
459 True,
460 (False, True),
461 (None, parent_fid),
462 (None, basename),
463 (None, kind),
464 (None, executable))
465 self.files[path] = [change[0], None]
466 changes.append(change)
468 def update_entry(fid, path, kind, mode = None):
469 dirname, basename = os.path.split(path)
470 parent_fid = get_parent(dirname, basename)
472 executable = False
473 if mode == '100755':
474 executable = True
475 elif mode == '120000':
476 kind = 'symlink'
478 change = (fid,
479 (path, path),
480 True,
481 (True, True),
482 (None, parent_fid),
483 (None, basename),
484 (None, kind),
485 (None, executable))
486 self.files[path] = [change[0], None]
487 changes.append(change)
489 def remove_entry(fid, path, kind):
490 dirname, basename = os.path.split(path)
491 parent_fid = get_parent(dirname, basename)
492 change = (fid,
493 (path, None),
494 True,
495 (True, False),
496 (parent_fid, None),
497 (None, None),
498 (None, None),
499 (None, None))
500 del self.files[path]
501 changes.append(change)
503 for fid, f in self.updates.iteritems():
504 path = f['path']
506 if 'deleted' in f:
507 remove_entry(fid, path, 'file')
508 continue
510 if path in self.base_files:
511 update_entry(fid, path, 'file', f['mode'])
512 else:
513 add_entry(fid, path, 'file', f['mode'])
515 self.files[path][1] = f['mark']
516 self.rev_files[fid][1] = f['mark']
518 return changes
520 def get_content(self, file_id):
521 path, mark = self.rev_files[file_id]
522 if mark:
523 return blob_marks[mark]
525 # last resort
526 tree = self.branch.repository.revision_tree(self.base_id)
527 return tree.get_file_text(file_id)
529 def get_file_with_stat(self, file_id, path=None):
530 content = self.get_content(file_id)
531 return (StringIO.StringIO(content), None)
533 def get_symlink_target(self, file_id):
534 return self.get_content(file_id)
536 def id2path(self, file_id):
537 path, mark = self.rev_files[file_id]
538 return path
540 def c_style_unescape(string):
541 if string[0] == string[-1] == '"':
542 return string.decode('string-escape')[1:-1]
543 return string
545 def parse_commit(parser):
546 global marks, blob_marks, parsed_refs
547 global mode
549 parents = []
551 ref = parser[1]
552 parser.next()
554 if ref != 'refs/heads/master':
555 die("bzr doesn't support multiple branches; use 'master'")
557 commit_mark = parser.get_mark()
558 parser.next()
559 author = parser.get_author()
560 parser.next()
561 committer = parser.get_author()
562 parser.next()
563 data = parser.get_data()
564 parser.next()
565 if parser.check('from'):
566 parents.append(parser.get_mark())
567 parser.next()
568 while parser.check('merge'):
569 parents.append(parser.get_mark())
570 parser.next()
572 # fast-export adds an extra newline
573 if data[-1] == '\n':
574 data = data[:-1]
576 files = {}
578 for line in parser:
579 if parser.check('M'):
580 t, m, mark_ref, path = line.split(' ', 3)
581 mark = int(mark_ref[1:])
582 f = { 'mode' : m, 'mark' : mark }
583 elif parser.check('D'):
584 t, path = line.split(' ')
585 f = { 'deleted' : True }
586 else:
587 die('Unknown file command: %s' % line)
588 path = c_style_unescape(path).decode('utf-8')
589 files[path] = f
591 branch = parser.repo
593 committer, date, tz = committer
594 parents = [str(mark_to_rev(p)) for p in parents]
595 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
596 props = {}
597 props['branch-nick'] = branch.nick
599 mtree = CustomTree(branch, revid, parents, files)
600 changes = mtree.iter_changes()
602 branch.lock_write()
603 try:
604 builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid)
605 try:
606 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
607 builder.finish_inventory()
608 builder.commit(data.decode('utf-8', 'replace'))
609 except Exception, e:
610 builder.abort()
611 raise
612 finally:
613 branch.unlock()
615 parsed_refs[ref] = revid
616 marks.new_mark(revid, commit_mark)
618 def parse_reset(parser):
619 global parsed_refs
621 ref = parser[1]
622 parser.next()
624 if ref != 'refs/heads/master':
625 die("bzr doesn't support multiple branches; use 'master'")
627 # ugh
628 if parser.check('commit'):
629 parse_commit(parser)
630 return
631 if not parser.check('from'):
632 return
633 from_mark = parser.get_mark()
634 parser.next()
636 parsed_refs[ref] = mark_to_rev(from_mark)
638 def do_export(parser):
639 global parsed_refs, dirname, peer
641 parser.next()
643 for line in parser.each_block('done'):
644 if parser.check('blob'):
645 parse_blob(parser)
646 elif parser.check('commit'):
647 parse_commit(parser)
648 elif parser.check('reset'):
649 parse_reset(parser)
650 elif parser.check('tag'):
651 pass
652 elif parser.check('feature'):
653 pass
654 else:
655 die('unhandled export command: %s' % line)
657 branch = parser.repo
659 for ref, revid in parsed_refs.iteritems():
660 if ref == 'refs/heads/master':
661 branch.generate_revision_history(revid, marks.get_tip('master'))
662 if peer:
663 try:
664 branch.push(peer, stop_revision=revid)
665 except bzrlib.errors.DivergedBranches:
666 print "error %s non-fast forward" % ref
667 continue
669 try:
670 wt = branch.bzrdir.open_workingtree()
671 wt.update()
672 except bzrlib.errors.NoWorkingTree:
673 pass
675 print "ok %s" % ref
677 print
679 def do_capabilities(parser):
680 global dirname
682 print "import"
683 print "export"
684 print "refspec refs/heads/*:%s/heads/*" % prefix
685 print "refspec refs/tags/*:%s/tags/*" % prefix
687 path = os.path.join(dirname, 'marks-git')
689 if os.path.exists(path):
690 print "*import-marks %s" % path
691 print "*export-marks %s" % path
693 print
695 def ref_is_valid(name):
696 return not True in [c in name for c in '~^: \\']
698 def do_list(parser):
699 global tags
700 print "? refs/heads/%s" % 'master'
702 branch = parser.repo
703 branch.lock_read()
704 for tag, revid in branch.tags.get_tag_dict().items():
705 try:
706 branch.revision_id_to_dotted_revno(revid)
707 except bzrlib.errors.NoSuchRevision:
708 continue
709 if not ref_is_valid(tag):
710 continue
711 print "? refs/tags/%s" % tag
712 tags[tag] = revid
713 branch.unlock()
714 print "@refs/heads/%s HEAD" % 'master'
715 print
717 def get_repo(url, alias):
718 global dirname, peer
720 origin = bzrlib.bzrdir.BzrDir.open(url)
721 branch = origin.open_branch()
723 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
724 clone_path = os.path.join(dirname, 'clone')
725 remote_branch = branch
726 if os.path.exists(clone_path):
727 # pull
728 d = bzrlib.bzrdir.BzrDir.open(clone_path)
729 branch = d.open_branch()
730 try:
731 result = branch.pull(remote_branch, [], None, False)
732 except bzrlib.errors.DivergedBranches:
733 # use remote branch for now
734 peer = None
735 return remote_branch
736 else:
737 # clone
738 d = origin.sprout(clone_path, None,
739 hardlink=True, create_tree_if_local=False,
740 source_branch=remote_branch)
741 branch = d.open_branch()
742 branch.bind(remote_branch)
744 peer = remote_branch
745 else:
746 peer = None
748 return branch
750 def fix_path(alias, orig_url):
751 url = urlparse.urlparse(orig_url, 'file')
752 if url.scheme != 'file' or os.path.isabs(url.path):
753 return
754 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
755 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
756 subprocess.call(cmd)
758 def main(args):
759 global marks, prefix, dirname
760 global tags, filenodes
761 global blob_marks
762 global parsed_refs
763 global files_cache
764 global is_tmp
766 alias = args[1]
767 url = args[2]
769 tags = {}
770 filenodes = {}
771 blob_marks = {}
772 parsed_refs = {}
773 files_cache = {}
774 marks = None
776 if alias[5:] == url:
777 is_tmp = True
778 alias = hashlib.sha1(alias).hexdigest()
779 else:
780 is_tmp = False
782 prefix = 'refs/bzr/%s' % alias
783 gitdir = os.environ['GIT_DIR']
784 dirname = os.path.join(gitdir, 'bzr', alias)
786 if not is_tmp:
787 fix_path(alias, url)
789 if not os.path.exists(dirname):
790 os.makedirs(dirname)
792 bzrlib.ui.ui_factory.be_quiet(True)
794 repo = get_repo(url, alias)
796 marks_path = os.path.join(dirname, 'marks-int')
797 marks = Marks(marks_path)
799 parser = Parser(repo)
800 for line in parser:
801 if parser.check('capabilities'):
802 do_capabilities(parser)
803 elif parser.check('list'):
804 do_list(parser)
805 elif parser.check('import'):
806 do_import(parser)
807 elif parser.check('export'):
808 do_export(parser)
809 else:
810 die('unhandled command: %s' % line)
811 sys.stdout.flush()
813 def bye():
814 if not marks:
815 return
816 if not is_tmp:
817 marks.store()
818 else:
819 shutil.rmtree(dirname)
821 atexit.register(bye)
822 sys.exit(main(sys.argv))