remote-bzr: cleanup CustomTree
[git/jrn.git] / contrib / remote-helpers / git-remote-bzr
blob7452a572c1f50a37e52364838a761d0b4e4ae371
1 #!/usr/bin/env python
3 # Copyright (c) 2012 Felipe Contreras
7 # Just copy to your ~/bin, or anywhere in your $PATH.
8 # Then you can clone with:
9 # % git clone bzr::/path/to/bzr/repo/or/url
11 # For example:
12 # % git clone bzr::$HOME/myrepo
13 # or
14 # % git clone bzr::lp:myrepo
17 import sys
19 import bzrlib
20 if hasattr(bzrlib, "initialize"):
21 bzrlib.initialize()
23 import bzrlib.plugin
24 bzrlib.plugin.load_plugins()
26 import bzrlib.generate_ids
27 import bzrlib.transport
28 import bzrlib.errors
29 import bzrlib.ui
31 import sys
32 import os
33 import json
34 import re
35 import StringIO
36 import atexit, shutil, hashlib, urlparse, subprocess
38 NAME_RE = re.compile('^([^<>]+)')
39 AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$')
40 RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)')
42 def die(msg, *args):
43 sys.stderr.write('ERROR: %s\n' % (msg % args))
44 sys.exit(1)
46 def warn(msg, *args):
47 sys.stderr.write('WARNING: %s\n' % (msg % args))
49 def gittz(tz):
50 return '%+03d%02d' % (tz / 3600, tz % 3600 / 60)
52 class Marks:
54 def __init__(self, path):
55 self.path = path
56 self.tips = {}
57 self.marks = {}
58 self.rev_marks = {}
59 self.last_mark = 0
60 self.load()
62 def load(self):
63 if not os.path.exists(self.path):
64 return
66 tmp = json.load(open(self.path))
67 self.tips = tmp['tips']
68 self.marks = tmp['marks']
69 self.last_mark = tmp['last-mark']
71 for rev, mark in self.marks.iteritems():
72 self.rev_marks[mark] = rev
74 def dict(self):
75 return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark }
77 def store(self):
78 json.dump(self.dict(), open(self.path, 'w'))
80 def __str__(self):
81 return str(self.dict())
83 def from_rev(self, rev):
84 return self.marks[rev]
86 def to_rev(self, mark):
87 return self.rev_marks[mark]
89 def next_mark(self):
90 self.last_mark += 1
91 return self.last_mark
93 def get_mark(self, rev):
94 self.last_mark += 1
95 self.marks[rev] = self.last_mark
96 return self.last_mark
98 def is_marked(self, rev):
99 return rev in self.marks
101 def new_mark(self, rev, mark):
102 self.marks[rev] = mark
103 self.rev_marks[mark] = rev
104 self.last_mark = mark
106 def get_tip(self, branch):
107 return self.tips.get(branch, None)
109 def set_tip(self, branch, tip):
110 self.tips[branch] = tip
112 class Parser:
114 def __init__(self, repo):
115 self.repo = repo
116 self.line = self.get_line()
118 def get_line(self):
119 return sys.stdin.readline().strip()
121 def __getitem__(self, i):
122 return self.line.split()[i]
124 def check(self, word):
125 return self.line.startswith(word)
127 def each_block(self, separator):
128 while self.line != separator:
129 yield self.line
130 self.line = self.get_line()
132 def __iter__(self):
133 return self.each_block('')
135 def next(self):
136 self.line = self.get_line()
137 if self.line == 'done':
138 self.line = None
140 def get_mark(self):
141 i = self.line.index(':') + 1
142 return int(self.line[i:])
144 def get_data(self):
145 if not self.check('data'):
146 return None
147 i = self.line.index(' ') + 1
148 size = int(self.line[i:])
149 return sys.stdin.read(size)
151 def get_author(self):
152 m = RAW_AUTHOR_RE.match(self.line)
153 if not m:
154 return None
155 _, name, email, date, tz = m.groups()
156 committer = '%s <%s>' % (name, email)
157 tz = int(tz)
158 tz = ((tz / 100) * 3600) + ((tz % 100) * 60)
159 return (committer, int(date), tz)
161 def rev_to_mark(rev):
162 global marks
163 return marks.from_rev(rev)
165 def mark_to_rev(mark):
166 global marks
167 return marks.to_rev(mark)
169 def fixup_user(user):
170 name = mail = None
171 user = user.replace('"', '')
172 m = AUTHOR_RE.match(user)
173 if m:
174 name = m.group(1)
175 mail = m.group(2).strip()
176 else:
177 m = NAME_RE.match(user)
178 if m:
179 name = m.group(1).strip()
181 return '%s <%s>' % (name, mail)
183 def get_filechanges(cur, prev):
184 modified = {}
185 removed = {}
187 changes = cur.changes_from(prev)
189 def u(s):
190 return s.encode('utf-8')
192 for path, fid, kind in changes.added:
193 modified[u(path)] = fid
194 for path, fid, kind in changes.removed:
195 removed[u(path)] = None
196 for path, fid, kind, mod, _ in changes.modified:
197 modified[u(path)] = fid
198 for oldpath, newpath, fid, kind, mod, _ in changes.renamed:
199 removed[u(oldpath)] = None
200 if kind == 'directory':
201 lst = cur.list_files(from_dir=newpath, recursive=True)
202 for path, file_class, kind, fid, entry in lst:
203 if kind != 'directory':
204 modified[u(newpath + '/' + path)] = fid
205 else:
206 modified[u(newpath)] = fid
208 return modified, removed
210 def export_files(tree, files):
211 global marks, filenodes
213 final = []
214 for path, fid in files.iteritems():
215 kind = tree.kind(fid)
217 h = tree.get_file_sha1(fid)
219 if kind == 'symlink':
220 d = tree.get_symlink_target(fid)
221 mode = '120000'
222 elif kind == 'file':
224 if tree.is_executable(fid):
225 mode = '100755'
226 else:
227 mode = '100644'
229 # is the blob already exported?
230 if h in filenodes:
231 mark = filenodes[h]
232 final.append((mode, mark, path))
233 continue
235 d = tree.get_file_text(fid)
236 elif kind == 'directory':
237 continue
238 else:
239 die("Unhandled kind '%s' for path '%s'" % (kind, path))
241 mark = marks.next_mark()
242 filenodes[h] = mark
244 print "blob"
245 print "mark :%u" % mark
246 print "data %d" % len(d)
247 print d
249 final.append((mode, mark, path))
251 return final
253 def export_branch(branch, name):
254 global prefix
256 ref = '%s/heads/%s' % (prefix, name)
257 tip = marks.get_tip(name)
259 repo = branch.repository
260 repo.lock_read()
261 revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward')
262 count = 0
264 revs = [revid for revid, _, _, _ in revs if not marks.is_marked(revid)]
266 for revid in revs:
268 rev = repo.get_revision(revid)
270 parents = rev.parent_ids
271 time = rev.timestamp
272 tz = rev.timezone
273 committer = rev.committer.encode('utf-8')
274 committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz))
275 authors = rev.get_apparent_authors()
276 if authors:
277 author = authors[0].encode('utf-8')
278 author = "%s %u %s" % (fixup_user(author), time, gittz(tz))
279 else:
280 author = committer
281 msg = rev.message.encode('utf-8')
283 msg += '\n'
285 if len(parents) == 0:
286 parent = bzrlib.revision.NULL_REVISION
287 else:
288 parent = parents[0]
290 cur_tree = repo.revision_tree(revid)
291 prev = repo.revision_tree(parent)
292 modified, removed = get_filechanges(cur_tree, prev)
294 modified_final = export_files(cur_tree, modified)
296 if len(parents) == 0:
297 print 'reset %s' % ref
299 print "commit %s" % ref
300 print "mark :%d" % (marks.get_mark(revid))
301 print "author %s" % (author)
302 print "committer %s" % (committer)
303 print "data %d" % (len(msg))
304 print msg
306 for i, p in enumerate(parents):
307 try:
308 m = rev_to_mark(p)
309 except KeyError:
310 # ghost?
311 continue
312 if i == 0:
313 print "from :%s" % m
314 else:
315 print "merge :%s" % m
317 for f in removed:
318 print "D %s" % (f,)
319 for f in modified_final:
320 print "M %s :%u %s" % f
321 print
323 count += 1
324 if (count % 100 == 0):
325 print "progress revision %s (%d/%d)" % (revid, count, len(revs))
326 print "#############################################################"
328 repo.unlock()
330 revid = branch.last_revision()
332 # make sure the ref is updated
333 print "reset %s" % ref
334 print "from :%u" % rev_to_mark(revid)
335 print
337 marks.set_tip(name, revid)
339 def export_tag(repo, name):
340 global tags, prefix
342 ref = '%s/tags/%s' % (prefix, name)
343 print "reset %s" % ref
344 print "from :%u" % rev_to_mark(tags[name])
345 print
347 def do_import(parser):
348 global dirname
350 branch = parser.repo
351 path = os.path.join(dirname, 'marks-git')
353 print "feature done"
354 if os.path.exists(path):
355 print "feature import-marks=%s" % path
356 print "feature export-marks=%s" % path
357 sys.stdout.flush()
359 while parser.check('import'):
360 ref = parser[1]
361 if ref.startswith('refs/heads/'):
362 name = ref[len('refs/heads/'):]
363 export_branch(branch, name)
364 if ref.startswith('refs/tags/'):
365 name = ref[len('refs/tags/'):]
366 export_tag(branch, name)
367 parser.next()
369 print 'done'
371 sys.stdout.flush()
373 def parse_blob(parser):
374 global blob_marks
376 parser.next()
377 mark = parser.get_mark()
378 parser.next()
379 data = parser.get_data()
380 blob_marks[mark] = data
381 parser.next()
383 class CustomTree():
385 def __init__(self, repo, revid, parents, files):
386 global files_cache
388 self.updates = {}
390 def copy_tree(revid):
391 files = files_cache[revid] = {}
392 tree = repo.repository.revision_tree(revid)
393 repo.lock_read()
394 try:
395 for path, entry in tree.iter_entries_by_dir():
396 files[path] = entry.file_id
397 finally:
398 repo.unlock()
399 return files
401 if len(parents) == 0:
402 self.base_id = bzrlib.revision.NULL_REVISION
403 self.base_files = {}
404 else:
405 self.base_id = parents[0]
406 self.base_files = files_cache.get(self.base_id, None)
407 if not self.base_files:
408 self.base_files = copy_tree(self.base_id)
410 self.files = files_cache[revid] = self.base_files.copy()
412 for path, f in files.iteritems():
413 fid = self.files.get(path, None)
414 if not fid:
415 fid = bzrlib.generate_ids.gen_file_id(path)
416 f['path'] = path
417 self.updates[fid] = f
419 def last_revision(self):
420 return self.base_id
422 def iter_changes(self):
423 changes = []
425 def get_parent(dirname, basename):
426 parent_fid = self.base_files.get(dirname, None)
427 if parent_fid:
428 return parent_fid
429 parent_fid = self.files.get(dirname, None)
430 if parent_fid:
431 return parent_fid
432 if basename == '':
433 return None
434 fid = bzrlib.generate_ids.gen_file_id(path)
435 add_entry(fid, dirname, 'directory')
436 return fid
438 def add_entry(fid, path, kind, mode = None):
439 dirname, basename = os.path.split(path)
440 parent_fid = get_parent(dirname, basename)
442 executable = False
443 if mode == '100755':
444 executable = True
445 elif mode == '120000':
446 kind = 'symlink'
448 change = (fid,
449 (None, path),
450 True,
451 (False, True),
452 (None, parent_fid),
453 (None, basename),
454 (None, kind),
455 (None, executable))
456 self.files[path] = change[0]
457 changes.append(change)
459 def update_entry(fid, path, kind, mode = None):
460 dirname, basename = os.path.split(path)
461 parent_fid = get_parent(dirname, basename)
463 executable = False
464 if mode == '100755':
465 executable = True
466 elif mode == '120000':
467 kind = 'symlink'
469 change = (fid,
470 (path, path),
471 True,
472 (True, True),
473 (None, parent_fid),
474 (None, basename),
475 (None, kind),
476 (None, executable))
477 self.files[path] = change[0]
478 changes.append(change)
480 def remove_entry(fid, path, kind):
481 dirname, basename = os.path.split(path)
482 parent_fid = get_parent(dirname, basename)
483 change = (fid,
484 (path, None),
485 True,
486 (True, False),
487 (parent_fid, None),
488 (None, None),
489 (None, None),
490 (None, None))
491 del self.files[path]
492 changes.append(change)
494 for fid, f in self.updates.iteritems():
495 path = f['path']
497 if 'deleted' in f:
498 remove_entry(fid, path, 'file')
499 continue
501 if path in self.base_files:
502 update_entry(fid, path, 'file', f['mode'])
503 else:
504 add_entry(fid, path, 'file', f['mode'])
506 return changes
508 def get_file_with_stat(self, file_id, path=None):
509 return (StringIO.StringIO(self.updates[file_id]['data']), None)
511 def get_symlink_target(self, file_id):
512 return self.updates[file_id]['data']
514 def c_style_unescape(string):
515 if string[0] == string[-1] == '"':
516 return string.decode('string-escape')[1:-1]
517 return string
519 def parse_commit(parser):
520 global marks, blob_marks, parsed_refs
521 global mode
523 parents = []
525 ref = parser[1]
526 parser.next()
528 if ref != 'refs/heads/master':
529 die("bzr doesn't support multiple branches; use 'master'")
531 commit_mark = parser.get_mark()
532 parser.next()
533 author = parser.get_author()
534 parser.next()
535 committer = parser.get_author()
536 parser.next()
537 data = parser.get_data()
538 parser.next()
539 if parser.check('from'):
540 parents.append(parser.get_mark())
541 parser.next()
542 while parser.check('merge'):
543 parents.append(parser.get_mark())
544 parser.next()
546 # fast-export adds an extra newline
547 if data[-1] == '\n':
548 data = data[:-1]
550 files = {}
552 for line in parser:
553 if parser.check('M'):
554 t, m, mark_ref, path = line.split(' ', 3)
555 mark = int(mark_ref[1:])
556 f = { 'mode' : m, 'data' : blob_marks[mark] }
557 elif parser.check('D'):
558 t, path = line.split(' ')
559 f = { 'deleted' : True }
560 else:
561 die('Unknown file command: %s' % line)
562 path = c_style_unescape(path).decode('utf-8')
563 files[path] = f
565 repo = parser.repo
567 committer, date, tz = committer
568 parents = [str(mark_to_rev(p)) for p in parents]
569 revid = bzrlib.generate_ids.gen_revision_id(committer, date)
570 props = {}
571 props['branch-nick'] = repo.nick
573 mtree = CustomTree(repo, revid, parents, files)
574 changes = mtree.iter_changes()
576 repo.lock_write()
577 try:
578 builder = repo.get_commit_builder(parents, None, date, tz, committer, props, revid)
579 try:
580 list(builder.record_iter_changes(mtree, mtree.last_revision(), changes))
581 builder.finish_inventory()
582 builder.commit(data.decode('utf-8', 'replace'))
583 except Exception, e:
584 builder.abort()
585 raise
586 finally:
587 repo.unlock()
589 parsed_refs[ref] = revid
590 marks.new_mark(revid, commit_mark)
592 def parse_reset(parser):
593 global parsed_refs
595 ref = parser[1]
596 parser.next()
598 if ref != 'refs/heads/master':
599 die("bzr doesn't support multiple branches; use 'master'")
601 # ugh
602 if parser.check('commit'):
603 parse_commit(parser)
604 return
605 if not parser.check('from'):
606 return
607 from_mark = parser.get_mark()
608 parser.next()
610 parsed_refs[ref] = mark_to_rev(from_mark)
612 def do_export(parser):
613 global parsed_refs, dirname, peer
615 parser.next()
617 for line in parser.each_block('done'):
618 if parser.check('blob'):
619 parse_blob(parser)
620 elif parser.check('commit'):
621 parse_commit(parser)
622 elif parser.check('reset'):
623 parse_reset(parser)
624 elif parser.check('tag'):
625 pass
626 elif parser.check('feature'):
627 pass
628 else:
629 die('unhandled export command: %s' % line)
631 repo = parser.repo
633 for ref, revid in parsed_refs.iteritems():
634 if ref == 'refs/heads/master':
635 repo.generate_revision_history(revid, marks.get_tip('master'))
636 if peer:
637 try:
638 repo.push(peer, stop_revision=revid)
639 except bzrlib.errors.DivergedBranches:
640 print "error %s non-fast forward" % ref
641 continue
642 else:
643 wt = repo.bzrdir.open_workingtree()
644 wt.update()
645 print "ok %s" % ref
647 print
649 def do_capabilities(parser):
650 global dirname
652 print "import"
653 print "export"
654 print "refspec refs/heads/*:%s/heads/*" % prefix
655 print "refspec refs/tags/*:%s/tags/*" % prefix
657 path = os.path.join(dirname, 'marks-git')
659 if os.path.exists(path):
660 print "*import-marks %s" % path
661 print "*export-marks %s" % path
663 print
665 def ref_is_valid(name):
666 return not True in [c in name for c in '~^: \\']
668 def do_list(parser):
669 global tags
670 print "? refs/heads/%s" % 'master'
672 branch = parser.repo
673 branch.lock_read()
674 for tag, revid in branch.tags.get_tag_dict().items():
675 try:
676 branch.revision_id_to_dotted_revno(revid)
677 except bzrlib.errors.NoSuchRevision:
678 continue
679 if not ref_is_valid(tag):
680 continue
681 print "? refs/tags/%s" % tag
682 tags[tag] = revid
683 branch.unlock()
684 print "@refs/heads/%s HEAD" % 'master'
685 print
687 def get_repo(url, alias):
688 global dirname, peer
690 origin = bzrlib.bzrdir.BzrDir.open(url)
691 branch = origin.open_branch()
693 if not isinstance(origin.transport, bzrlib.transport.local.LocalTransport):
694 clone_path = os.path.join(dirname, 'clone')
695 remote_branch = branch
696 if os.path.exists(clone_path):
697 # pull
698 d = bzrlib.bzrdir.BzrDir.open(clone_path)
699 branch = d.open_branch()
700 result = branch.pull(remote_branch, [], None, False)
701 else:
702 # clone
703 d = origin.sprout(clone_path, None,
704 hardlink=True, create_tree_if_local=False,
705 source_branch=remote_branch)
706 branch = d.open_branch()
707 branch.bind(remote_branch)
709 peer = remote_branch
710 else:
711 peer = None
713 return branch
715 def fix_path(alias, orig_url):
716 url = urlparse.urlparse(orig_url, 'file')
717 if url.scheme != 'file' or os.path.isabs(url.path):
718 return
719 abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url)
720 cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url]
721 subprocess.call(cmd)
723 def main(args):
724 global marks, prefix, dirname
725 global tags, filenodes
726 global blob_marks
727 global parsed_refs
728 global files_cache
729 global is_tmp
731 alias = args[1]
732 url = args[2]
734 tags = {}
735 filenodes = {}
736 blob_marks = {}
737 parsed_refs = {}
738 files_cache = {}
739 marks = None
741 if alias[5:] == url:
742 is_tmp = True
743 alias = hashlib.sha1(alias).hexdigest()
744 else:
745 is_tmp = False
747 prefix = 'refs/bzr/%s' % alias
748 gitdir = os.environ['GIT_DIR']
749 dirname = os.path.join(gitdir, 'bzr', alias)
751 if not is_tmp:
752 fix_path(alias, url)
754 if not os.path.exists(dirname):
755 os.makedirs(dirname)
757 bzrlib.ui.ui_factory.be_quiet(True)
759 repo = get_repo(url, alias)
761 marks_path = os.path.join(dirname, 'marks-int')
762 marks = Marks(marks_path)
764 parser = Parser(repo)
765 for line in parser:
766 if parser.check('capabilities'):
767 do_capabilities(parser)
768 elif parser.check('list'):
769 do_list(parser)
770 elif parser.check('import'):
771 do_import(parser)
772 elif parser.check('export'):
773 do_export(parser)
774 else:
775 die('unhandled command: %s' % line)
776 sys.stdout.flush()
778 def bye():
779 if not marks:
780 return
781 if not is_tmp:
782 marks.store()
783 else:
784 shutil.rmtree(dirname)
786 atexit.register(bye)
787 sys.exit(main(sys.argv))