remote-bzr: store converted URL
[git.git] / contrib / remote-helpers / git-remote-bzr
blob34f61565768650eea2970eac4d7e27e602ef88e8
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
30 import sys
31 import os
32 import json
33 import re
34 import StringIO
35 import atexit, shutil, hashlib, urlparse, subprocess
37 NAME_RE = re.compile('^([^<>]+)')
38 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
39 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
41 def die(msg, *args):
42 sys.stderr.write('ERROR: %s\n' % (msg % args))
43 sys.exit(1)
45 def warn(msg, *args):
46 sys.stderr.write('WARNING: %s\n' % (msg % args))
48 def gittz(tz):
49 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
51 class Marks:
53 def __init__(self, path):
54 self.path = path
55 self.tips = {}
56 self.marks = {}
57 self.rev_marks = {}
58 self.last_mark = 0
59 self.load()
61 def load(self):
62 if not os.path.exists(self.path):
63 return
65 tmp = json.load(open(self.path))
66 self.tips = tmp['tips']
67 self.marks = tmp['marks']
68 self.last_mark = tmp['last-mark']
70 for rev, mark in self.marks.iteritems():
71 self.rev_marks[mark] = rev
73 def dict(self):
74 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
76 def store(self):
77 json.dump(self.dict(), open(self.path, 'w'))
79 def __str__(self):
80 return str(self.dict())
82 def from_rev(self, rev):
83 return self.marks[rev]
85 def to_rev(self, mark):
86 return self.rev_marks[mark]
88 def next_mark(self):
89 self.last_mark += 1
90 return self.last_mark
92 def get_mark(self, rev):
93 self.last_mark += 1
94 self.marks[rev] = self.last_mark
95 return self.last_mark
97 def is_marked(self, rev):
98 return rev in self.marks
100 def new_mark(self, rev, mark):
101 self.marks[rev] = mark
102 self.rev_marks[mark] = rev
103 self.last_mark = mark
105 def get_tip(self, branch):
106 return self.tips.get(branch, None)
108 def set_tip(self, branch, tip):
109 self.tips[branch] = tip
111 class Parser:
113 def __init__(self, repo):
114 self.repo = repo
115 self.line = self.get_line()
117 def get_line(self):
118 return sys.stdin.readline().strip()
120 def __getitem__(self, i):
121 return self.line.split()[i]
123 def check(self, word):
124 return self.line.startswith(word)
126 def each_block(self, separator):
127 while self.line != separator:
128 yield self.line
129 self.line = self.get_line()
131 def __iter__(self):
132 return self.each_block('')
134 def next(self):
135 self.line = self.get_line()
136 if self.line == 'done':
137 self.line = None
139 def get_mark(self):
140 i = self.line.index(':') + 1
141 return int(self.line[i:])
143 def get_data(self):
144 if not self.check('data'):
145 return None
146 i = self.line.index(' ') + 1
147 size = int(self.line[i:])
148 return sys.stdin.read(size)
150 def get_author(self):
151 m = RAW_AUTHOR_RE.match(self.line)
152 if not m:
153 return None
154 _, name, email, date, tz = m.groups()
155 committer = '%s <%s>' % (name, email)
156 tz = int(tz)
157 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
158 return (committer, int(date), tz)
160 def rev_to_mark(rev):
161 global marks
162 return marks.from_rev(rev)
164 def mark_to_rev(mark):
165 global marks
166 return marks.to_rev(mark)
168 def fixup_user(user):
169 name = mail = None
170 user = user.replace('"', '')
171 m = AUTHOR_RE.match(user)
172 if m:
173 name = m.group(1)
174 mail = m.group(2).strip()
175 else:
176 m = NAME_RE.match(user)
177 if m:
178 name = m.group(1).strip()
180 return '%s <%s>' % (name, mail)
182 def get_filechanges(cur, prev):
183 modified = {}
184 removed = {}
186 changes = cur.changes_from(prev)
188 def u(s):
189 return s.encode('utf-8')
191 for path, fid, kind in changes.added:
192 modified[u(path)] = fid
193 for path, fid, kind in changes.removed:
194 removed[u(path)] = None
195 for path, fid, kind, mod, _ in changes.modified:
196 modified[u(path)] = fid
197 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
198 removed[u(oldpath)] = None
199 if kind == 'directory':
200 lst = cur.list_files(from_dir=newpath, recursive=True)
201 for path, file_class, kind, fid, entry in lst:
202 if kind != 'directory':
203 modified[u(newpath + '/' + path)] = fid
204 else:
205 modified[u(newpath)] = fid
207 return modified, removed
209 def export_files(tree, files):
210 global marks, filenodes
212 final = []
213 for path, fid in files.iteritems():
214 kind = tree.kind(fid)
216 h = tree.get_file_sha1(fid)
218 if kind == 'symlink':
219 d = tree.get_symlink_target(fid)
220 mode = '120000'
221 elif kind == 'file':
223 if tree.is_executable(fid):
224 mode = '100755'
225 else:
226 mode = '100644'
228 # is the blob already exported?
229 if h in filenodes:
230 mark = filenodes[h]
231 final.append((mode, mark, path))
232 continue
234 d = tree.get_file_text(fid)
235 elif kind == 'directory':
236 continue
237 else:
238 die("Unhandled kind '%s' for path '%s'" % (kind, path))
240 mark = marks.next_mark()
241 filenodes[h] = mark
243 print "blob"
244 print "mark :%u" % mark
245 print "data %d" % len(d)
246 print d
248 final.append((mode, mark, path))
250 return final
252 def export_branch(branch, name):
253 global prefix
255 ref = '%s/heads/%s' % (prefix, name)
256 tip = marks.get_tip(name)
258 repo = branch.repository
259 repo.lock_read()
260 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
261 count = 0
263 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
265 for revid in revs:
267 rev = repo.get_revision(revid)
269 parents = rev.parent_ids
270 time = rev.timestamp
271 tz = rev.timezone
272 committer = rev.committer.encode('utf-8')
273 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
274 authors = rev.get_apparent_authors()
275 if authors:
276 author = authors[0].encode('utf-8')
277 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
278 else:
279 author = committer
280 msg = rev.message.encode('utf-8')
282 msg += '\n'
284 if len(parents) == 0:
285 parent = bzrlib.revision.NULL_REVISION
286 else:
287 parent = parents[0]
289 cur_tree = repo.revision_tree(revid)
290 prev = repo.revision_tree(parent)
291 modified, removed = get_filechanges(cur_tree, prev)
293 modified_final = export_files(cur_tree, modified)
295 if len(parents) == 0:
296 print 'reset %s' % ref
298 print "commit %s" % ref
299 print "mark :%d" % (marks.get_mark(revid))
300 print "author %s" % (author)
301 print "committer %s" % (committer)
302 print "data %d" % (len(msg))
303 print msg
305 for i, p in enumerate(parents):
306 try:
307 m = rev_to_mark(p)
308 except KeyError:
309 # ghost?
310 continue
311 if i == 0:
312 print "from :%s" % m
313 else:
314 print "merge :%s" % m
316 for f in removed:
317 print "D %s" % (f,)
318 for f in modified_final:
319 print "M %s :%u %s" % f
320 print
322 count += 1
323 if (count % 100 == 0):
324 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
325 print "#############################################################"
327 repo.unlock()
329 revid = branch.last_revision()
331 # make sure the ref is updated
332 print "reset %s" % ref
333 print "from :%u" % rev_to_mark(revid)
334 print
336 marks.set_tip(name, revid)
338 def export_tag(repo, name):
339 global tags, prefix
341 ref = '%s/tags/%s' % (prefix, name)
342 print "reset %s" % ref
343 print "from :%u" % rev_to_mark(tags[name])
344 print
346 def do_import(parser):
347 global dirname
349 branch = parser.repo
350 path = os.path.join(dirname, 'marks-git')
352 print "feature done"
353 if os.path.exists(path):
354 print "feature import-marks=%s" % path
355 print "feature export-marks=%s" % path
356 sys.stdout.flush()
358 while parser.check('import'):
359 ref = parser[1]
360 if ref.startswith('refs/heads/'):
361 name = ref[len('refs/heads/'):]
362 export_branch(branch, name)
363 if ref.startswith('refs/tags/'):
364 name = ref[len('refs/tags/'):]
365 export_tag(branch, name)
366 parser.next()
368 print 'done'
370 sys.stdout.flush()
372 def parse_blob(parser):
373 global blob_marks
375 parser.next()
376 mark = parser.get_mark()
377 parser.next()
378 data = parser.get_data()
379 blob_marks[mark] = data
380 parser.next()
382 class CustomTree():
384 def __init__(self, repo, revid, parents, files):
385 global files_cache
387 self.repo = repo
388 self.revid = revid
389 self.parents = parents
390 self.updates = {}
392 def copy_tree(revid):
393 files = files_cache[revid] = {}
394 tree = repo.repository.revision_tree(revid)
395 repo.lock_read()
396 try:
397 for path, entry in tree.iter_entries_by_dir():
398 files[path] = entry.file_id
399 finally:
400 repo.unlock()
401 return files
403 if len(parents) == 0:
404 self.base_id = bzrlib.revision.NULL_REVISION
405 self.base_files = {}
406 else:
407 self.base_id = parents[0]
408 self.base_files = files_cache.get(self.base_id, None)
409 if not self.base_files:
410 self.base_files = copy_tree(self.base_id)
412 self.files = files_cache[revid] = self.base_files.copy()
414 for path, f in files.iteritems():
415 fid = self.files.get(path, None)
416 if not fid:
417 fid = bzrlib.generate_ids.gen_file_id(path)
418 f['path'] = path
419 self.updates[fid] = f
421 def last_revision(self):
422 return self.base_id
424 def iter_changes(self):
425 changes = []
427 def get_parent(dirname, basename):
428 parent_fid = self.base_files.get(dirname, None)
429 if parent_fid:
430 return parent_fid
431 parent_fid = self.files.get(dirname, None)
432 if parent_fid:
433 return parent_fid
434 if basename == '':
435 return None
436 fid = bzrlib.generate_ids.gen_file_id(path)
437 d = add_entry(fid, dirname, 'directory')
438 return fid
440 def add_entry(fid, path, kind, mode = None):
441 dirname, basename = os.path.split(path)
442 parent_fid = get_parent(dirname, basename)
444 executable = False
445 if mode == '100755':
446 executable = True
447 elif mode == '120000':
448 kind = 'symlink'
450 change = (fid,
451 (None, path),
452 True,
453 (False, True),
454 (None, parent_fid),
455 (None, basename),
456 (None, kind),
457 (None, executable))
458 self.files[path] = change[0]
459 changes.append(change)
460 return change
462 def update_entry(fid, path, kind, mode = None):
463 dirname, basename = os.path.split(path)
464 parent_fid = get_parent(dirname, basename)
466 executable = False
467 if mode == '100755':
468 executable = True
469 elif mode == '120000':
470 kind = 'symlink'
472 change = (fid,
473 (path, path),
474 True,
475 (True, True),
476 (None, parent_fid),
477 (None, basename),
478 (None, kind),
479 (None, executable))
480 self.files[path] = change[0]
481 changes.append(change)
482 return change
484 def remove_entry(fid, path, kind):
485 dirname, basename = os.path.split(path)
486 parent_fid = get_parent(dirname, basename)
487 change = (fid,
488 (path, None),
489 True,
490 (True, False),
491 (parent_fid, None),
492 (None, None),
493 (None, None),
494 (None, None))
495 del self.files[path]
496 changes.append(change)
497 return change
499 for fid, f in self.updates.iteritems():
500 path = f['path']
502 if 'deleted' in f:
503 remove_entry(fid, path, 'file')
504 continue
506 if path in self.base_files:
507 update_entry(fid, path, 'file', f['mode'])
508 else:
509 add_entry(fid, path, 'file', f['mode'])
511 return changes
513 def get_file_with_stat(self, file_id, path=None):
514 return (StringIO.StringIO(self.updates[file_id]['data']), None)
516 def get_symlink_target(self, file_id):
517 return self.updates[file_id]['data']
519 def c_style_unescape(string):
520 if string[0] == string[-1] == '"':
521 return string.decode('string-escape')[1:-1]
522 return string
524 def parse_commit(parser):
525 global marks, blob_marks, parsed_refs
526 global mode
528 parents = []
530 ref = parser[1]
531 parser.next()
533 if ref != 'refs/heads/master':
534 die("bzr doesn't support multiple branches; use 'master'")
536 commit_mark = parser.get_mark()
537 parser.next()
538 author = parser.get_author()
539 parser.next()
540 committer = parser.get_author()
541 parser.next()
542 data = parser.get_data()
543 parser.next()
544 if parser.check('from'):
545 parents.append(parser.get_mark())
546 parser.next()
547 while parser.check('merge'):
548 parents.append(parser.get_mark())
549 parser.next()
551 files = {}
553 for line in parser:
554 if parser.check('M'):
555 t, m, mark_ref, path = line.split(' ', 3)
556 mark = int(mark_ref[1:])
557 f = { 'mode' : m, 'data' : blob_marks[mark] }
558 elif parser.check('D'):
559 t, path = line.split(' ')
560 f = { 'deleted' : True }
561 else:
562 die('Unknown file command: %s' % line)
563 path = c_style_unescape(path).decode('utf-8')
564 files[path] = f
566 repo = parser.repo
568 committer, date, tz = committer
569 parents = [str(mark_to_rev(p)) for p in parents]
570 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
571 props = {}
572 props['branch-nick'] = repo.nick
574 mtree = CustomTree(repo, revid, parents, files)
575 changes = mtree.iter_changes()
577 repo.lock_write()
578 try:
579 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
580 try:
581 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
582 builder.finish_inventory()
583 builder.commit(data.decode('utf-8', 'replace'))
584 except Exception, e:
585 builder.abort()
586 raise
587 finally:
588 repo.unlock()
590 parsed_refs[ref] = revid
591 marks.new_mark(revid, commit_mark)
593 def parse_reset(parser):
594 global parsed_refs
596 ref = parser[1]
597 parser.next()
599 if ref != 'refs/heads/master':
600 die("bzr doesn't support multiple branches; use 'master'")
602 # ugh
603 if parser.check('commit'):
604 parse_commit(parser)
605 return
606 if not parser.check('from'):
607 return
608 from_mark = parser.get_mark()
609 parser.next()
611 parsed_refs[ref] = mark_to_rev(from_mark)
613 def do_export(parser):
614 global parsed_refs, dirname, peer
616 parser.next()
618 for line in parser.each_block('done'):
619 if parser.check('blob'):
620 parse_blob(parser)
621 elif parser.check('commit'):
622 parse_commit(parser)
623 elif parser.check('reset'):
624 parse_reset(parser)
625 elif parser.check('tag'):
626 pass
627 elif parser.check('feature'):
628 pass
629 else:
630 die('unhandled export command: %s' % line)
632 repo = parser.repo
634 for ref, revid in parsed_refs.iteritems():
635 if ref == 'refs/heads/master':
636 repo.generate_revision_history(revid, marks.get_tip('master'))
637 if peer:
638 try:
639 repo.push(peer, stop_revision=revid)
640 except bzrlib.errors.DivergedBranches:
641 print "error %s non-fast forward" % ref
642 continue
643 else:
644 wt = repo.bzrdir.open_workingtree()
645 wt.update()
646 print "ok %s" % ref
648 print
650 def do_capabilities(parser):
651 global dirname
653 print "import"
654 print "export"
655 print "refspec refs/heads/*:%s/heads/*" % prefix
656 print "refspec refs/tags/*:%s/tags/*" % prefix
658 path = os.path.join(dirname, 'marks-git')
660 if os.path.exists(path):
661 print "*import-marks %s" % path
662 print "*export-marks %s" % path
664 print
666 def ref_is_valid(name):
667 return not True in [c in name for c in '~^: \\']
669 def do_list(parser):
670 global tags
671 print "? refs/heads/%s" % 'master'
673 branch = parser.repo
674 branch.lock_read()
675 for tag, revid in branch.tags.get_tag_dict().items():
676 try:
677 branch.revision_id_to_dotted_revno(revid)
678 except bzrlib.errors.NoSuchRevision:
679 continue
680 if not ref_is_valid(tag):
681 continue
682 print "? refs/tags/%s" % tag
683 tags[tag] = revid
684 branch.unlock()
685 print "@refs/heads/%s HEAD" % 'master'
686 print
688 def get_repo(url, alias):
689 global dirname, peer
691 origin = bzrlib.bzrdir.BzrDir.open(url)
692 branch = origin.open_branch()
694 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
695 clone_path = os.path.join(dirname, 'clone')
696 remote_branch = branch
697 if os.path.exists(clone_path):
698 # pull
699 d = bzrlib.bzrdir.BzrDir.open(clone_path)
700 branch = d.open_branch()
701 result = branch.pull(remote_branch, [], None, False)
702 else:
703 # clone
704 d = origin.sprout(clone_path, None,
705 hardlink=True, create_tree_if_local=False,
706 source_branch=remote_branch)
707 branch = d.open_branch()
708 branch.bind(remote_branch)
710 peer = remote_branch
711 else:
712 peer = None
714 return branch
716 def fix_path(alias, orig_url):
717 url = urlparse.urlparse(orig_url, 'file')
718 if url.scheme != 'file' or os.path.isabs(url.path):
719 return
720 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
721 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
722 subprocess.call(cmd)
724 def main(args):
725 global marks, prefix, dirname
726 global tags, filenodes
727 global blob_marks
728 global parsed_refs
729 global files_cache
730 global is_tmp
732 alias = args[1]
733 url = args[2]
735 tags = {}
736 filenodes = {}
737 blob_marks = {}
738 parsed_refs = {}
739 files_cache = {}
740 marks = None
742 if alias[5:] == url:
743 is_tmp = True
744 alias = hashlib.sha1(alias).hexdigest()
745 else:
746 is_tmp = False
748 prefix = 'refs/bzr/%s' % alias
749 gitdir = os.environ['GIT_DIR']
750 dirname = os.path.join(gitdir, 'bzr', alias)
752 if not is_tmp:
753 fix_path(alias, url)
755 if not os.path.exists(dirname):
756 os.makedirs(dirname)
758 repo = get_repo(url, alias)
760 marks_path = os.path.join(dirname, 'marks-int')
761 marks = Marks(marks_path)
763 parser = Parser(repo)
764 for line in parser:
765 if parser.check('capabilities'):
766 do_capabilities(parser)
767 elif parser.check('list'):
768 do_list(parser)
769 elif parser.check('import'):
770 do_import(parser)
771 elif parser.check('export'):
772 do_export(parser)
773 else:
774 die('unhandled command: %s' % line)
775 sys.stdout.flush()
777 def bye():
778 if not marks:
779 return
780 if not is_tmp:
781 marks.store()
782 else:
783 shutil.rmtree(dirname)
785 atexit.register(bye)
786 sys.exit(main(sys.argv))