remote-bzr: strip extra newline
[git.git] / contrib / remote-helpers / git-remote-bzr
blobc19ed0e26b7d072eb8afffa36d2d707bc4cff59d
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
52 class Marks:
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
62 def load(self):
63 if not os.path.exists(self.path):
64 return
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
80 def __str__(self):
81 return str(self.dict())
83 def from_rev(self, rev):
84 return self.marks[rev]
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
98 def is_marked(self, rev):
99 return rev in self.marks
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
112 class Parser:
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
118 def get_line(self):
119 return sys.stdin.readline().strip()
121 def __getitem__(self, i):
122 return self.line.split()[i]
124 def check(self, word):
125 return self.line.startswith(word)
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
132 def __iter__(self):
133 return self.each_block('')
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
181 return '%s <%s>' % (name, mail)
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
187 changes = cur.changes_from(prev)
189 def u(s):
190 return s.encode('utf-8')
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
208 return modified, removed
210 def export_files(tree, files):
211 global marks, filenodes
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
217 h = tree.get_file_sha1(fid)
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
241 mark = marks.next_mark()
242 filenodes[h] = mark
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
249 final.append((mode, mark, path))
251 return final
253 def export_branch(branch, name):
254 global prefix
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
259 repo = branch.repository
260 repo.lock_read()
261 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
262 count = 0
264 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
266 for revid in revs:
268 rev = repo.get_revision(revid)
270 parents = rev.parent_ids
271 time = rev.timestamp
272 tz = rev.timezone
273 committer = rev.committer.encode('utf-8')
274 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
275 authors = rev.get_apparent_authors()
276 if authors:
277 author = authors[0].encode('utf-8')
278 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
279 else:
280 author = committer
281 msg = rev.message.encode('utf-8')
283 msg += '\n'
285 if len(parents) == 0:
286 parent = bzrlib.revision.NULL_REVISION
287 else:
288 parent = parents[0]
290 cur_tree = repo.revision_tree(revid)
291 prev = repo.revision_tree(parent)
292 modified, removed = get_filechanges(cur_tree, prev)
294 modified_final = export_files(cur_tree, modified)
296 if len(parents) == 0:
297 print 'reset %s' % ref
299 print "commit %s" % ref
300 print "mark :%d" % (marks.get_mark(revid))
301 print "author %s" % (author)
302 print "committer %s" % (committer)
303 print "data %d" % (len(msg))
304 print msg
306 for i, p in enumerate(parents):
307 try:
308 m = rev_to_mark(p)
309 except KeyError:
310 # ghost?
311 continue
312 if i == 0:
313 print "from :%s" % m
314 else:
315 print "merge :%s" % m
317 for f in removed:
318 print "D %s" % (f,)
319 for f in modified_final:
320 print "M %s :%u %s" % f
321 print
323 count += 1
324 if (count % 100 == 0):
325 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
326 print "#############################################################"
328 repo.unlock()
330 revid = branch.last_revision()
332 # make sure the ref is updated
333 print "reset %s" % ref
334 print "from :%u" % rev_to_mark(revid)
335 print
337 marks.set_tip(name, revid)
339 def export_tag(repo, name):
340 global tags, prefix
342 ref = '%s/tags/%s' % (prefix, name)
343 print "reset %s" % ref
344 print "from :%u" % rev_to_mark(tags[name])
345 print
347 def do_import(parser):
348 global dirname
350 branch = parser.repo
351 path = os.path.join(dirname, 'marks-git')
353 print "feature done"
354 if os.path.exists(path):
355 print "feature import-marks=%s" % path
356 print "feature export-marks=%s" % path
357 sys.stdout.flush()
359 while parser.check('import'):
360 ref = parser[1]
361 if ref.startswith('refs/heads/'):
362 name = ref[len('refs/heads/'):]
363 export_branch(branch, name)
364 if ref.startswith('refs/tags/'):
365 name = ref[len('refs/tags/'):]
366 export_tag(branch, name)
367 parser.next()
369 print 'done'
371 sys.stdout.flush()
373 def parse_blob(parser):
374 global blob_marks
376 parser.next()
377 mark = parser.get_mark()
378 parser.next()
379 data = parser.get_data()
380 blob_marks[mark] = data
381 parser.next()
383 class CustomTree():
385 def __init__(self, repo, revid, parents, files):
386 global files_cache
388 self.repo = repo
389 self.revid = revid
390 self.parents = parents
391 self.updates = {}
393 def copy_tree(revid):
394 files = files_cache[revid] = {}
395 tree = repo.repository.revision_tree(revid)
396 repo.lock_read()
397 try:
398 for path, entry in tree.iter_entries_by_dir():
399 files[path] = entry.file_id
400 finally:
401 repo.unlock()
402 return files
404 if len(parents) == 0:
405 self.base_id = bzrlib.revision.NULL_REVISION
406 self.base_files = {}
407 else:
408 self.base_id = parents[0]
409 self.base_files = files_cache.get(self.base_id, None)
410 if not self.base_files:
411 self.base_files = copy_tree(self.base_id)
413 self.files = files_cache[revid] = self.base_files.copy()
415 for path, f in files.iteritems():
416 fid = self.files.get(path, None)
417 if not fid:
418 fid = bzrlib.generate_ids.gen_file_id(path)
419 f['path'] = path
420 self.updates[fid] = f
422 def last_revision(self):
423 return self.base_id
425 def iter_changes(self):
426 changes = []
428 def get_parent(dirname, basename):
429 parent_fid = self.base_files.get(dirname, None)
430 if parent_fid:
431 return parent_fid
432 parent_fid = self.files.get(dirname, None)
433 if parent_fid:
434 return parent_fid
435 if basename == '':
436 return None
437 fid = bzrlib.generate_ids.gen_file_id(path)
438 d = add_entry(fid, dirname, 'directory')
439 return fid
441 def add_entry(fid, path, kind, mode = None):
442 dirname, basename = os.path.split(path)
443 parent_fid = get_parent(dirname, basename)
445 executable = False
446 if mode == '100755':
447 executable = True
448 elif mode == '120000':
449 kind = 'symlink'
451 change = (fid,
452 (None, path),
453 True,
454 (False, True),
455 (None, parent_fid),
456 (None, basename),
457 (None, kind),
458 (None, executable))
459 self.files[path] = change[0]
460 changes.append(change)
461 return change
463 def update_entry(fid, path, kind, mode = None):
464 dirname, basename = os.path.split(path)
465 parent_fid = get_parent(dirname, basename)
467 executable = False
468 if mode == '100755':
469 executable = True
470 elif mode == '120000':
471 kind = 'symlink'
473 change = (fid,
474 (path, path),
475 True,
476 (True, True),
477 (None, parent_fid),
478 (None, basename),
479 (None, kind),
480 (None, executable))
481 self.files[path] = change[0]
482 changes.append(change)
483 return change
485 def remove_entry(fid, path, kind):
486 dirname, basename = os.path.split(path)
487 parent_fid = get_parent(dirname, basename)
488 change = (fid,
489 (path, None),
490 True,
491 (True, False),
492 (parent_fid, None),
493 (None, None),
494 (None, None),
495 (None, None))
496 del self.files[path]
497 changes.append(change)
498 return change
500 for fid, f in self.updates.iteritems():
501 path = f['path']
503 if 'deleted' in f:
504 remove_entry(fid, path, 'file')
505 continue
507 if path in self.base_files:
508 update_entry(fid, path, 'file', f['mode'])
509 else:
510 add_entry(fid, path, 'file', f['mode'])
512 return changes
514 def get_file_with_stat(self, file_id, path=None):
515 return (StringIO.StringIO(self.updates[file_id]['data']), None)
517 def get_symlink_target(self, file_id):
518 return self.updates[file_id]['data']
520 def c_style_unescape(string):
521 if string[0] == string[-1] == '"':
522 return string.decode('string-escape')[1:-1]
523 return string
525 def parse_commit(parser):
526 global marks, blob_marks, parsed_refs
527 global mode
529 parents = []
531 ref = parser[1]
532 parser.next()
534 if ref != 'refs/heads/master':
535 die("bzr doesn't support multiple branches; use 'master'")
537 commit_mark = parser.get_mark()
538 parser.next()
539 author = parser.get_author()
540 parser.next()
541 committer = parser.get_author()
542 parser.next()
543 data = parser.get_data()
544 parser.next()
545 if parser.check('from'):
546 parents.append(parser.get_mark())
547 parser.next()
548 while parser.check('merge'):
549 parents.append(parser.get_mark())
550 parser.next()
552 # fast-export adds an extra newline
553 if data[-1] == '\n':
554 data = data[:-1]
556 files = {}
558 for line in parser:
559 if parser.check('M'):
560 t, m, mark_ref, path = line.split(' ', 3)
561 mark = int(mark_ref[1:])
562 f = { 'mode' : m, 'data' : blob_marks[mark] }
563 elif parser.check('D'):
564 t, path = line.split(' ')
565 f = { 'deleted' : True }
566 else:
567 die('Unknown file command: %s' % line)
568 path = c_style_unescape(path).decode('utf-8')
569 files[path] = f
571 repo = parser.repo
573 committer, date, tz = committer
574 parents = [str(mark_to_rev(p)) for p in parents]
575 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
576 props = {}
577 props['branch-nick'] = repo.nick
579 mtree = CustomTree(repo, revid, parents, files)
580 changes = mtree.iter_changes()
582 repo.lock_write()
583 try:
584 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
585 try:
586 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
587 builder.finish_inventory()
588 builder.commit(data.decode('utf-8', 'replace'))
589 except Exception, e:
590 builder.abort()
591 raise
592 finally:
593 repo.unlock()
595 parsed_refs[ref] = revid
596 marks.new_mark(revid, commit_mark)
598 def parse_reset(parser):
599 global parsed_refs
601 ref = parser[1]
602 parser.next()
604 if ref != 'refs/heads/master':
605 die("bzr doesn't support multiple branches; use 'master'")
607 # ugh
608 if parser.check('commit'):
609 parse_commit(parser)
610 return
611 if not parser.check('from'):
612 return
613 from_mark = parser.get_mark()
614 parser.next()
616 parsed_refs[ref] = mark_to_rev(from_mark)
618 def do_export(parser):
619 global parsed_refs, dirname, peer
621 parser.next()
623 for line in parser.each_block('done'):
624 if parser.check('blob'):
625 parse_blob(parser)
626 elif parser.check('commit'):
627 parse_commit(parser)
628 elif parser.check('reset'):
629 parse_reset(parser)
630 elif parser.check('tag'):
631 pass
632 elif parser.check('feature'):
633 pass
634 else:
635 die('unhandled export command: %s' % line)
637 repo = parser.repo
639 for ref, revid in parsed_refs.iteritems():
640 if ref == 'refs/heads/master':
641 repo.generate_revision_history(revid, marks.get_tip('master'))
642 if peer:
643 try:
644 repo.push(peer, stop_revision=revid)
645 except bzrlib.errors.DivergedBranches:
646 print "error %s non-fast forward" % ref
647 continue
648 else:
649 wt = repo.bzrdir.open_workingtree()
650 wt.update()
651 print "ok %s" % ref
653 print
655 def do_capabilities(parser):
656 global dirname
658 print "import"
659 print "export"
660 print "refspec refs/heads/*:%s/heads/*" % prefix
661 print "refspec refs/tags/*:%s/tags/*" % prefix
663 path = os.path.join(dirname, 'marks-git')
665 if os.path.exists(path):
666 print "*import-marks %s" % path
667 print "*export-marks %s" % path
669 print
671 def ref_is_valid(name):
672 return not True in [c in name for c in '~^: \\']
674 def do_list(parser):
675 global tags
676 print "? refs/heads/%s" % 'master'
678 branch = parser.repo
679 branch.lock_read()
680 for tag, revid in branch.tags.get_tag_dict().items():
681 try:
682 branch.revision_id_to_dotted_revno(revid)
683 except bzrlib.errors.NoSuchRevision:
684 continue
685 if not ref_is_valid(tag):
686 continue
687 print "? refs/tags/%s" % tag
688 tags[tag] = revid
689 branch.unlock()
690 print "@refs/heads/%s HEAD" % 'master'
691 print
693 def get_repo(url, alias):
694 global dirname, peer
696 origin = bzrlib.bzrdir.BzrDir.open(url)
697 branch = origin.open_branch()
699 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
700 clone_path = os.path.join(dirname, 'clone')
701 remote_branch = branch
702 if os.path.exists(clone_path):
703 # pull
704 d = bzrlib.bzrdir.BzrDir.open(clone_path)
705 branch = d.open_branch()
706 result = branch.pull(remote_branch, [], None, False)
707 else:
708 # clone
709 d = origin.sprout(clone_path, None,
710 hardlink=True, create_tree_if_local=False,
711 source_branch=remote_branch)
712 branch = d.open_branch()
713 branch.bind(remote_branch)
715 peer = remote_branch
716 else:
717 peer = None
719 return branch
721 def fix_path(alias, orig_url):
722 url = urlparse.urlparse(orig_url, 'file')
723 if url.scheme != 'file' or os.path.isabs(url.path):
724 return
725 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
726 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
727 subprocess.call(cmd)
729 def main(args):
730 global marks, prefix, dirname
731 global tags, filenodes
732 global blob_marks
733 global parsed_refs
734 global files_cache
735 global is_tmp
737 alias = args[1]
738 url = args[2]
740 tags = {}
741 filenodes = {}
742 blob_marks = {}
743 parsed_refs = {}
744 files_cache = {}
745 marks = None
747 if alias[5:] == url:
748 is_tmp = True
749 alias = hashlib.sha1(alias).hexdigest()
750 else:
751 is_tmp = False
753 prefix = 'refs/bzr/%s' % alias
754 gitdir = os.environ['GIT_DIR']
755 dirname = os.path.join(gitdir, 'bzr', alias)
757 if not is_tmp:
758 fix_path(alias, url)
760 if not os.path.exists(dirname):
761 os.makedirs(dirname)
763 bzrlib.ui.ui_factory.be_quiet(True)
765 repo = get_repo(url, alias)
767 marks_path = os.path.join(dirname, 'marks-int')
768 marks = Marks(marks_path)
770 parser = Parser(repo)
771 for line in parser:
772 if parser.check('capabilities'):
773 do_capabilities(parser)
774 elif parser.check('list'):
775 do_list(parser)
776 elif parser.check('import'):
777 do_import(parser)
778 elif parser.check('export'):
779 do_export(parser)
780 else:
781 die('unhandled command: %s' % line)
782 sys.stdout.flush()
784 def bye():
785 if not marks:
786 return
787 if not is_tmp:
788 marks.store()
789 else:
790 shutil.rmtree(dirname)
792 atexit.register(bye)
793 sys.exit(main(sys.argv))